bzr-2.7.0/.rsyncexclude0000644000000000000000000000031310340517326013152 0ustar 00000000000000*.pyc *.pyo *~ # arch can bite me {arch} .arch-ids ,,* ++* /doc/*.html *.tmp bzr-test.log [#]*# .#* testrev.* /tmp # do want this after all + CHANGELOG /build test*.tmp .*.swp *.orig .*.orig .bzr-shelf* bzr-2.7.0/.testr.conf0000644000000000000000000000017611503642232012533 0ustar 00000000000000[DEFAULT] test_command=./bzr selftest --subunit $IDOPTION $LISTOPT test_id_option=--load-list $IDFILE test_list_option=--list bzr-2.7.0/BRANCH.TODO0000644000000000000000000000022611416163030012062 0ustar 00000000000000# This file is for listing TODOs for branches that are being worked on. # It should ALWAYS be empty in the mainline or in integration branches. # # bzr-2.7.0/COPYING.txt0000644000000000000000000004310310463160640012315 0ustar 00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. bzr-2.7.0/INSTALL0000644000000000000000000000267212017747302011506 0ustar 00000000000000bzr install instructions ************************ Dependencies ------------ bzr requires Python 2.6 or newer. If you wish to access branches over sftp, you will need paramiko and pycrypto: http://www.lag.net/paramiko/ bzr can optionally use compiled versions of some parts of the code for increased speed. When installing bzr you need the ability to build C extensions. Some GNU/Linux distributions package the necessary headers separately from the main Python package. This package is probably named something like python-dev or python-devel. FreeBSD, Windows, source-based GNU/Linux distributions, and possibly other operating systems, have the required files installed by default. If you are installing bzr from a bzr branch rather than a release tarball, then you should also have the Pyrex package installed. This is not necessary for release tarballs as they already contain the C files that Pyrex is needed to create. http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/ Installation ------------ When upgrading using setup.py, it is recommended that you first delete the bzrlib directory from the install target. To install bzr as a user, run python setup.py install --home ~ To install system-wide, run (as root) python setup.py install For more information on installation, see or write to bazaar@lists.canonical.com, or ask a question at . bzr-2.7.0/MANIFEST.in0000644000000000000000000000016611323371553012207 0ustar 00000000000000include bzr README setup.py recursive-include bzrlib *.py *.pyx *.pxd *.txt *.c *.h recursive-include tools *.py *.sh bzr-2.7.0/Makefile0000644000000000000000000003752012653717535012130 0ustar 00000000000000# Copyright (C) 2005-2012, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # A relatively simple Makefile to assist in building parts of bzr. Mostly for # building documentation, etc. ### Core Stuff ### PYTHON=python PYTHON24=python24 PYTHON25=python25 PYTHON26=python26 BZR_TARGET=release PLUGIN_TARGET=plugin-release PYTHON_BUILDFLAGS= .PHONY: all clean realclean extensions pyflakes api-docs check-nodocs check all: extensions extensions: @echo "building extension modules." $(PYTHON) setup.py build_ext -i $(PYTHON_BUILDFLAGS) check: docs check-nodocs check-nodocs: extensions set -e # Generate a stream for PQM to watch. -$(RM) -f selftest.log echo `date` ": selftest starts" 1>&2 $(PYTHON) -Werror -Wignore::ImportWarning -O ./bzr selftest -Oselftest.timeout=120 \ --subunit $(tests) | tee selftest.log echo `date` ": selftest ends" 1>&2 # An empty log file should catch errors in the $(PYTHON) # command above (the '|' swallow any errors since 'make' # sees the 'tee' exit code for the whole line if [ ! -s selftest.log ] ; then exit 1 ; fi # Check that there were no errors reported. subunit-stats < selftest.log # Run Python style checker (apt-get install pyflakes) # # Note that at present this gives many false warnings, because it doesn't # know about identifiers loaded through lazy_import. pyflakes: pyflakes bzrlib pyflakes-nounused: # There are many of these warnings at the moment and they're not a # high priority to fix pyflakes bzrlib | grep -v ' imported but unused' clean: $(PYTHON) setup.py clean -find . -name "*.pyc" -o -name "*.pyo" -o -name "*.so" | xargs rm -f realclean: clean # Remove files which are autogenerated but included by the tarball. rm -f bzrlib/*_pyx.c rm -f bzrlib/_simple_set_pyx.h bzrlib/_simple_set_pyx_api.h # Build API documentation docfiles = bzr bzrlib api-docs: mkdir -p api/html pydoctor --make-html --docformat='restructuredtext' --html-output=api/html $(docfiles) # build tags for emacs and vim TAGS: ctags -R -e bzrlib tags: ctags -R bzrlib # these are treated as phony so they'll always be rebuilt - it's pretty quick .PHONY: TAGS tags ### Documentation ### # Default to plain documentation for maximum backwards compatibility. # (Post 2.0, the defaults will most likely be Sphinx-style instead.) docs: docs-plain clean-docs: clean-plain html-docs: html-plain ### Man-page Documentation ### MAN_DEPENDENCIES = bzrlib/builtins.py \ $(wildcard bzrlib/*.py) \ $(wildcard bzrlib/*/*.py) \ tools/generate_docs.py \ $(wildcard $(addsuffix /*.txt, bzrlib/help_topics/en)) MAN_PAGES = man1/bzr.1 man1/bzr.1: $(MAN_DEPENDENCIES) $(PYTHON) tools/generate_docs.py -o $@ man ### Sphinx-style Documentation ### # Build the documentation. To keep the dependencies down to a minimum # for distro packagers, we only build the html documentation by default. # Sphinx 0.6 or later is preferred for the best rendering, though # Sphinx 0.4 or later should work. See http://sphinx.pocoo.org/index.html # for installation instructions. docs-sphinx: html-sphinx # Clean out generated documentation clean-sphinx: cd doc/en && make clean cd doc/es && make clean cd doc/ja && make clean cd doc/ru && make clean cd doc/developers && make clean SPHINX_DEPENDENCIES = \ doc/en/release-notes/index.txt \ doc/en/user-reference/index.txt \ doc/es/Makefile \ doc/es/make.bat \ doc/ja/Makefile \ doc/ja/make.bat \ doc/ru/Makefile \ doc/ru/make.bat \ doc/developers/Makefile \ doc/developers/make.bat NEWS_FILES = $(wildcard doc/en/release-notes/bzr-*.txt) doc/en/user-reference/index.txt: $(MAN_DEPENDENCIES) $(PYTHON) tools/generate_docs.py -o $@ rstx doc/en/release-notes/index.txt: $(NEWS_FILES) tools/generate_release_notes.py $(PYTHON) tools/generate_release_notes.py $@ $(NEWS_FILES) doc/%/Makefile: doc/en/Makefile $(PYTHON) -c "import shutil; shutil.copyfile('$<', '$@')" doc/%/make.bat: doc/en/make.bat $(PYTHON) -c "import shutil; shutil.copyfile('$<', '$@')" # Build the html docs using Sphinx. html-sphinx: $(SPHINX_DEPENDENCIES) cd doc/en && make html cd doc/es && make html cd doc/ru && make html cd doc/ja && make html cd doc/developers && make html # Build the PDF docs using Sphinx. This requires numerous LaTeX # packages. See http://sphinx.pocoo.org/builders.html for details. # Note: We don't currently build PDFs for the Russian docs because # they require additional packages to be installed (to handle # Russian hyphenation rules, etc.) pdf-sphinx: $(SPHINX_DEPENDENCIES) cd doc/en && make latex cd doc/es && make latex cd doc/ja && make latex cd doc/developers && make latex cd doc/en/_build/latex && make all-pdf cd doc/es/_build/latex && make all-pdf cd doc/developers/_build/latex && make all-pdf # Build the CHM (Windows Help) docs using Sphinx. # Note: HtmlHelp Workshop needs to be used on the generated hhp files # to generate the final chm files. chm-sphinx: $(SPHINX_DEPENDENCIES) cd doc/en && make htmlhelp cd doc/es && make htmlhelp cd doc/ru && make htmlhelp cd doc/ja && make htmlhelp cd doc/developers && make htmlhelp # Build the texinfo files using Sphinx. texinfo-sphinx: $(SPHINX_DEPENDENCIES) cd doc/en && make texinfo cd doc/es && make texinfo cd doc/ru && make texinfo cd doc/ja && make texinfo cd doc/developers && make texinfo ### Documentation Website ### # Where to build the website DOC_WEBSITE_BUILD = build_doc_website # Build and package docs into a website, complete with downloads. doc-website: html-sphinx pdf-sphinx $(PYTHON) tools/package_docs.py doc/en $(DOC_WEBSITE_BUILD) $(PYTHON) tools/package_docs.py doc/es $(DOC_WEBSITE_BUILD) $(PYTHON) tools/package_docs.py doc/ru $(DOC_WEBSITE_BUILD) $(PYTHON) tools/package_docs.py doc/ja $(DOC_WEBSITE_BUILD) $(PYTHON) tools/package_docs.py doc/developers $(DOC_WEBSITE_BUILD) ### Plain Documentation ### # While Sphinx is the preferred tool for building documentation, we still # support our "plain" html documentation so that Sphinx is not a hard # dependency for packagers on older platforms. rst2html = $(PYTHON) tools/rst2html.py --link-stylesheet --footnote-references=superscript --halt=warning # translate txt docs to html derived_txt_files = \ doc/en/release-notes/NEWS.txt txt_all = \ doc/en/tutorials/tutorial.txt \ doc/en/tutorials/using_bazaar_with_launchpad.txt \ doc/en/tutorials/centralized_workflow.txt \ $(wildcard doc/es/tutorials/*.txt) \ $(wildcard doc/ru/tutorials/*.txt) \ doc/ja/tutorials/tutorial.txt \ doc/ja/tutorials/using_bazaar_with_launchpad.txt \ doc/ja/tutorials/centralized_workflow.txt \ $(wildcard doc/*/mini-tutorial/index.txt) \ $(wildcard doc/*/user-guide/index-plain.txt) \ doc/en/admin-guide/index-plain.txt \ $(wildcard doc/es/guia-usario/*.txt) \ $(derived_txt_files) \ doc/en/upgrade-guide/index.txt \ doc/index.txt \ $(wildcard doc/index.*.txt) txt_nohtml = \ doc/en/user-guide/index.txt \ doc/es/user-guide/index.txt \ doc/ja/user-guide/index.txt \ doc/ru/user-guide/index.txt \ doc/en/admin-guide/index.txt txt_files = $(filter-out $(txt_nohtml), $(txt_all)) htm_files = $(patsubst %.txt, %.html, $(txt_files)) non_txt_files = \ doc/default.css \ $(wildcard doc/*/bzr-en-quick-reference.svg) \ $(wildcard doc/*/bzr-en-quick-reference.png) \ $(wildcard doc/*/bzr-en-quick-reference.pdf) \ $(wildcard doc/*/bzr-es-quick-reference.svg) \ $(wildcard doc/*/bzr-es-quick-reference.png) \ $(wildcard doc/*/bzr-es-quick-reference.pdf) \ $(wildcard doc/*/bzr-ru-quick-reference.svg) \ $(wildcard doc/*/bzr-ru-quick-reference.png) \ $(wildcard doc/*/bzr-ru-quick-reference.pdf) \ $(wildcard doc/*/user-guide/images/*.png) # doc/developers/*.txt files that should *not* be individually # converted to HTML dev_txt_nohtml = \ doc/developers/add.txt \ doc/developers/annotate.txt \ doc/developers/bundle-creation.txt \ doc/developers/commit.txt \ doc/developers/diff.txt \ doc/developers/directory-fingerprints.txt \ doc/developers/gc.txt \ doc/developers/implementation-notes.txt \ doc/developers/incremental-push-pull.txt \ doc/developers/index.txt \ doc/developers/initial-push-pull.txt \ doc/developers/merge-scaling.txt \ doc/developers/miscellaneous-notes.txt \ doc/developers/missing.txt \ doc/developers/performance-roadmap-rationale.txt \ doc/developers/performance-use-case-analysis.txt \ doc/developers/planned-change-integration.txt \ doc/developers/planned-performance-changes.txt \ doc/developers/plans.txt \ doc/developers/process.txt \ doc/developers/revert.txt \ doc/developers/specifications.txt \ doc/developers/status.txt \ doc/developers/uncommit.txt dev_txt_all = $(wildcard $(addsuffix /*.txt, doc/developers)) dev_txt_files = $(filter-out $(dev_txt_nohtml), $(dev_txt_all)) dev_htm_files = $(patsubst %.txt, %.html, $(dev_txt_files)) doc/en/user-guide/index-plain.html: $(wildcard $(addsuffix /*.txt, doc/en/user-guide)) $(rst2html) --stylesheet=../../default.css $(dir $@)index-plain.txt $@ #doc/es/user-guide/index.html: $(wildcard $(addsuffix /*.txt, doc/es/user-guide)) # $(rst2html) --stylesheet=../../default.css $(dir $@)index.txt $@ # #doc/ru/user-guide/index.html: $(wildcard $(addsuffix /*.txt, doc/ru/user-guide)) # $(rst2html) --stylesheet=../../default.css $(dir $@)index.txt $@ # doc/en/admin-guide/index-plain.html: $(wildcard $(addsuffix /*.txt, doc/en/admin-guide)) $(rst2html) --stylesheet=../../default.css $(dir $@)index-plain.txt $@ doc/developers/%.html: doc/developers/%.txt $(rst2html) --stylesheet=../default.css $< $@ doc/index.html: doc/index.txt $(rst2html) --stylesheet=default.css $< $@ doc/index.%.html: doc/index.%.txt $(rst2html) --stylesheet=default.css $< $@ %.html: %.txt $(rst2html) --stylesheet=../../default.css $< "$@" doc/en/release-notes/NEWS.txt: $(NEWS_FILES) tools/generate_release_notes.py $(PYTHON) tools/generate_release_notes.py "$@" $(NEWS_FILES) upgrade_guide_dependencies = $(wildcard $(addsuffix /*.txt, doc/en/upgrade-guide)) doc/en/upgrade-guide/index.html: $(upgrade_guide_dependencies) $(rst2html) --stylesheet=../../default.css $(dir $@)index.txt $@ derived_web_docs = $(htm_files) $(dev_htm_files) WEB_DOCS = $(derived_web_docs) $(non_txt_files) ALL_DOCS = $(derived_web_docs) $(MAN_PAGES) # the main target to build all the docs docs-plain: $(ALL_DOCS) # produce a tree containing just the final docs, ready for uploading to the web HTMLDIR = html_docs html-plain: docs-plain $(PYTHON) tools/win32/ostools.py copytree $(WEB_DOCS) $(HTMLDIR) # clean produced docs clean-plain: $(PYTHON) tools/win32/ostools.py remove $(ALL_DOCS) \ $(HTMLDIR) $(derived_txt_files) ### Miscellaneous Documentation Targets ### # build a png of our performance task list # this is no longer built by default; you can build it if you want to look at it doc/developers/performance.png: doc/developers/performance.dot @echo Generating $@ @dot -Tpng $< -o$@ || echo "Dot not installed; skipping generation of $@" ### Windows Support ### # make all the installers completely from scratch, using zc.buildout # to fetch the dependencies # These are files that need to be copied into the build location to boostrap # the build process. # Note that the path is relative to tools/win32 BUILDOUT_FILES = buildout.cfg \ buildout-templates/bin/build-installer.bat.in \ ostools.py bootstrap.py installer-all: @echo Make all the installers from scratch @# Build everything in a separate directory, to avoid cluttering the WT $(PYTHON) tools/win32/ostools.py makedir build-win32 @# cd to tools/win32 so that the relative paths are copied correctly cd tools/win32 && $(PYTHON) ostools.py copytree $(BUILDOUT_FILES) ../../build-win32 @# There seems to be a bug in gf.release.bzr, It doesn't correctly update @# existing release directories, so delete them manually before building @# It means things may be rebuilt that don't need to be, but at least @# it will be correct when they do. cd build-win32 && $(PYTHON) ostools.py remove release */release cd build-win32 && $(PYTHON) bootstrap.py cd build-win32 && bin/buildout cd build-win32 && bin/build-installer.bat $(BZR_TARGET) $(PLUGIN_TARGET) clean-installer-all: $(PYTHON) tools/win32/ostools.py remove build-win32 # make bzr.exe for win32 with py2exe exe: @echo *** Make bzr.exe $(PYTHON) tools/win32/ostools.py remove bzrlib/*.pyd $(PYTHON) setup.py build_ext -i -f $(PYTHON_BUILDFLAGS) $(PYTHON) setup.py py2exe > py2exe.log $(PYTHON) tools/win32/ostools.py copytodir tools/win32/start_bzr.bat win32_bzr.exe $(PYTHON) tools/win32/ostools.py copytodir tools/win32/bazaar.url win32_bzr.exe # win32 installer for bzr.exe installer: exe copy-docs @echo *** Make Windows installer $(PYTHON) tools/win32/run_script.py cog.py -d -o tools/win32/bzr.iss tools/win32/bzr.iss.cog iscc /Q tools/win32/bzr.iss # win32 Python's distutils-based installer # require to have Python interpreter installed on win32 py-inst-24: docs $(PYTHON24) setup.py bdist_wininst --install-script="bzr-win32-bdist-postinstall.py" -d . py-inst-25: docs $(PYTHON25) setup.py bdist_wininst --install-script="bzr-win32-bdist-postinstall.py" -d . py-inst-26: docs $(PYTHON26) setup.py bdist_wininst --install-script="bzr-win32-bdist-postinstall.py" -d . python-installer: py-inst-24 py-inst-25 py-inst-26 copy-docs: docs $(PYTHON) tools/win32/ostools.py copytodir README win32_bzr.exe/doc $(PYTHON) tools/win32/ostools.py copytree $(WEB_DOCS) win32_bzr.exe # clean on win32 all installer-related files and directories clean-win32: clean-docs $(PYTHON) tools/win32/ostools.py remove build $(PYTHON) tools/win32/ostools.py remove win32_bzr.exe $(PYTHON) tools/win32/ostools.py remove py2exe.log $(PYTHON) tools/win32/ostools.py remove tools/win32/bzr.iss $(PYTHON) tools/win32/ostools.py remove bzr-setup*.exe $(PYTHON) tools/win32/ostools.py remove bzr-*win32.exe $(PYTHON) tools/win32/ostools.py remove dist # i18n targets .PHONY: update-pot po/bzr.pot update-pot: po/bzr.pot TRANSLATABLE_PYFILES:=$(shell find bzrlib -name '*.py' \ | grep -v 'bzrlib/tests/' \ | grep -v 'bzrlib/doc' \ ) po/bzr.pot: $(PYFILES) $(DOCFILES) $(PYTHON) ./bzr export-pot --include-duplicates > po/bzr.pot echo $(TRANSLATABLE_PYFILES) | xargs \ xgettext --package-name "bzr" \ --msgid-bugs-address "" \ --copyright-holder "Canonical" \ --from-code ISO-8859-1 --join --sort-by-file --add-comments=i18n: \ -d bzr -p po -o bzr.pot ### Packaging Targets ### .PHONY: dist check-dist-tarball # build a distribution source tarball # # this method of copying the pyrex generated files is a bit ugly; it would be # nicer to generate it from distutils. dist: version=`./bzr version --short` && \ echo Building distribution of bzr $$version && \ expbasedir=`mktemp -t -d tmp_bzr_dist.XXXXXXXXXX` && \ expdir=$$expbasedir/bzr-$$version && \ tarball=$$PWD/../bzr-$$version.tar.gz && \ $(MAKE) clean && \ $(MAKE) && \ bzr export $$expdir && \ cp bzrlib/*.c bzrlib/*.h $$expdir/bzrlib/. && \ tar cfz $$tarball -C $$expbasedir bzr-$$version && \ gpg --detach-sign $$tarball && \ rm -rf $$expbasedir # run all tests in a previously built tarball check-dist-tarball: tmpdir=`mktemp -t -d tmp_bzr_check_dist.XXXXXXXXXX` && \ version=`./bzr version --short` && \ tarball=$$PWD/../bzr-$$version.tar.gz && \ tar Cxz $$tmpdir -f $$tarball && \ $(MAKE) -C $$tmpdir/bzr-$$version check && \ rm -rf $$tmpdir bzr-2.7.0/NEWS0000644000000000000000000000021511453551422011142 0ustar 00000000000000The NEWS file has been moved and split into multiple files (one per release series). The NEWS files are now found in doc/en/release-notes/. bzr-2.7.0/README0000644000000000000000000000445311453261701011331 0ustar 00000000000000================= README for Bazaar ================= Bazaar (``bzr``) is a decentralized revision control system, designed to be easy for developers and end users alike. Bazaar is part of the GNU project to develop a complete free operating system , and a project of Canonical . Ready-to-install packages are available for most popular operating systems from or you can install from source by following the instructions in the INSTALL file. To learn how to use Bazaar, see the official documentation in the `doc` directory or at . For additional training materials including screencasts and slides, visit our community wiki documentation page at: http://wiki.bazaar.canonical.com/Documentation/ Bazaar is Free Software, and is released under the GNU General Public License, version 2 or later. Bazaar highlights ================= Bazaar directly supports both central version control (like cvs/svn) and distributed version control (like git/hg). Developers can organize their workspace in whichever way they want on a per project basis including: * checkouts (like svn) * feature branches (like hg) * shared working tree (like git). It also directly supports and encourages a large number of development best practices like refactoring and pre-commit regression testing. Users can choose between our command line tool and our cross-platform GUI application. For further details, see our website at http://bazaar.canonical.com/en/ Feedback ======== If you encounter any problems with Bazaar, need help understanding it, or would like to offer suggestions or feedback, please get in touch with us: * Ask a question through our web support interface, at https://answers.launchpad.net/bzr/ * Report bugs at https://bugs.launchpad.net/bzr/+filebug * Write to us at bazaar@lists.canonical.com You can join the list at . You don't need to subscribe to post, but your first post will be held briefly for manual moderation. * Talk to us in irc://irc.ubuntu.com/bzr Our mission is to make a version control tool that developers LOVE to use and that casual contributors feel confident with. Please let us know how we're going. The Bazaar Team bzr-2.7.0/README_BDIST_RPM0000644000000000000000000000061511322235233012764 0ustar 00000000000000There is a bug in disttools for distributions who's rpmbuild compresses the man pages. This causes an error building the final packages as it's expecting bzr.1 and not finding it, but finding bzr.1.gz that's unpackaged. This bug is known to affect Fedora, RHEL, and Centos distributions. There is a preliminary patch at http://bugs.python.org/issue644744 that fixes this issue with disttools. bzr-2.7.0/TODO0000644000000000000000000000016311345540263011136 0ustar 00000000000000For things to do in Bazaar development, see https://bugs.launchpad.net/bzr/ https://blueprints.launchpad.net/bzr/ bzr-2.7.0/apport/0000755000000000000000000000000011357256273011763 5ustar 00000000000000bzr-2.7.0/bzr0000755000000000000000000001065712174741310011177 0ustar 00000000000000#! /usr/bin/env python # Copyright (C) 2005-2012 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import """Bazaar -- a free distributed version-control tool""" import os import sys import warnings # update this on each release _script_version = (2, 7, 0) NEED_VERS = (2, 6) if sys.version_info < NEED_VERS: sys.stderr.write("bzr: error: cannot find a suitable python interpreter\n") sys.stderr.write(" (need %d.%d or later)\n" % NEED_VERS) sys.exit(1) profiling = False if '--profile-imports' in sys.argv: import profile_imports profile_imports.install() profiling = True if os.name == "posix": import locale try: locale.setlocale(locale.LC_ALL, '') except locale.Error, e: sys.stderr.write('bzr: warning: %s\n' ' bzr could not set the application locale.\n' ' Although this should be no problem for bzr itself, it might\n' ' cause problems with some plugins. To investigate the issue,\n' ' look at the output of the locale(1p) tool.\n' % e) # Use better default than ascii with posix filesystems that deal in bytes # natively even when the C locale or no locale at all is given. Note that # we need an immortal string for the hack, hence the lack of a hyphen. sys._bzr_default_fs_enc = "utf8" # The python2.6 release includes some libraries that have deprecation warnings # against the interpreter - see https://bugs.launchpad.net/bzr/+bug/387139 warnings.filterwarnings('ignore', r"(struct integer overflow masking is deprecated|" r"'L' format requires 0 <= number <= 4294967295)", DeprecationWarning, 'gzip', ) # instruct bzrlib/__init__.py to install lazy_regex sys._bzr_lazy_regex = True try: import bzrlib except ImportError, e: sys.stderr.write("bzr: ERROR: " "Couldn't import bzrlib and dependencies.\n" "Please check the directory containing bzrlib is on your PYTHONPATH.\n" "\n") raise if bzrlib.version_info[:3] != _script_version: sys.stderr.write( "bzr: WARNING: bzrlib version doesn't match the bzr program.\n" "This may indicate an installation problem.\n" "bzrlib is version %s from %s\n" "bzr is version %s from %s\n" % ( bzrlib._format_version_tuple(bzrlib.version_info), bzrlib.__path__[0], bzrlib._format_version_tuple(_script_version), __file__)) import bzrlib.inspect_for_copy bzrlib.inspect_for_copy.import_copy_with_hacked_inspect() import bzrlib.breakin bzrlib.breakin.hook_debugger_to_signal() import bzrlib.decorators if ('--lsprof' in sys.argv or '--lsprof-file' in sys.argv or '--profile' in sys.argv or '--lsprof-timed' in sys.argv): bzrlib.decorators.use_pretty_decorators() else: bzrlib.decorators.use_fast_decorators() import bzrlib.commands import bzrlib.trace if __name__ == '__main__': library_state = bzrlib.initialize() library_state.__enter__() try: exit_val = bzrlib.commands.main() if profiling: profile_imports.log_stack_info(sys.stderr) finally: library_state.__exit__(None, None, None) # By this point we really have completed everything we want to do, and # there's no point doing any additional cleanup. Abruptly exiting here # stops any background threads getting into trouble as code is unloaded, # and it may also be slightly faster, through avoiding gc of objects that # are just about to be discarded anyhow. This does mean that atexit hooks # won't run but we don't use them. Also file buffers won't be flushed, # but our policy is to always close files from a finally block. -- mbp 20070215 sys.exitfunc() os._exit(exit_val) else: raise ImportError("The bzr script cannot be imported.") bzr-2.7.0/bzr.ico0000644000000000000000000003144611061526035011743 0ustar 00000000000000@@(F00n %h-(@973480586697:848<49=67:85>86?:VW?XS;ZW;XZ=[X=Z];_]mz,fo>ca:dc=gg:bh>noux?vyFOJELMFNNHJIILJLOMDROIRMNQOFTPIQQHVSH[ZQTRRTSSUTUWUCb^G_`@ecEjiGljDooEvwIttKvv@wzMxxr,sG~M~=  2SUSXC@GSV]`cbeFFfGHihljjn0HJG <=>1CIIKEFLGKMOKMNIOJ75186?:VW?XS;ZW;XZ=[X=Z];_]mz,fo>ca:dc=gg:bh>noux?vyFOJELMFNNHJIILJLOMDROIRMNQOFTPIQQHVSH[ZQTRRTSSUTUWUCb^G_`@ecEjiGljDooEvwIttKvv@wzMxxr,sG~M~=  2SUSXC@GSV]`cbeFFfGHihljjn0HJG <=>1CIIKEFLGKMOKMNIOJ751;;;8Mi -H|F%~>;;;8LU5;;IF-%v>;;;7 8;;5UF%i>;;;;;;;7IF%i8;;;;;;; }F%i8;;;;;;7NF%i8;;;;;;'{F i8;;;;;7UF~i8;;;;;'F]i8;;;;5tF%hi8;;;;){~%ii8;;;7M~%hh8;;;8M|~ %vUMI7;;;7#LQpx~%~[887;;;;;778'T~%vh8;;;;;;;;;;){~5%K;;;;;;;;;8h~5%v7;;;;;;;;I~ %J;;;;;;;5y~ '>;;;;;7N F]7;;;7(()N;;;'}%)8;5i%lFi8])l(%l('lF)('l(%l(%vh5>bm??????( @973480586697:848<49=67:85>86?:VW?XS;ZW;XZ=[X=Z];_]mz,fo>ca:dc=gg:bh>noux?vyFOJELMFNNHJIILJLOMDROIRMNQOFTPIQQHVSH[ZQTRRTSSUTUWUCb^G_`@ecEjiGljDooEvwIttKvv@wzMxxr,sG~M~=  2SUSXC@GSV]`cbeFFfGHihljjn0HJG <=>1CIIKEFLGKMOKMNIOJ751;;;;7{v~>;;;;NhK>;;;8F%>;;;U(>;;5|*|?;; x|'J% 7;;;'M*[A;;;;;;7yJ]~;;;;;;N[hK;;;;]i7;;7ifii;;NhiWAhihihihf]N*????( 973480586697:848<49=67:85>86?:VW?XS;ZW;XZ=[X=Z];_]mz,fo>ca:dc=gg:bh>noux?vyFOJELMFNNHJIILJLOMDROIRMNQOFTPIQQHVSH[ZQTRRTSSUTUWUCb^G_`@ecEjiGljDooEvwIttKvv@wzMxxr,sG~M~=  2SUSXC@GSV]`cbeFFfGHihljjn0HJG <=>1CIIKEFLGKMOKMNIOJ751 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """A custom importer and regex compiler which logs time spent.""" import sys import time import re _parent_stack = [] _total_stack = {} _info = {} _cur_id = 0 _timer = time.time if sys.platform == 'win32': _timer = time.clock def stack_add(name, frame_name, frame_lineno, scope_name=None): """Start a new record on the stack""" global _cur_id _cur_id += 1 this_stack = (_cur_id, name) if _parent_stack: _total_stack[_parent_stack[-1]].append(this_stack) _total_stack[this_stack] = [] _parent_stack.append(this_stack) _info[this_stack] = [len(_parent_stack)-1, frame_name, frame_lineno, scope_name] return this_stack def stack_finish(this, cost): """Finish a given entry, and record its cost in time""" global _parent_stack assert _parent_stack[-1] == this, \ 'import stack does not end with this %s: %s' % (this, _parent_stack) _parent_stack.pop() _info[this].append(cost) def log_stack_info(out_file, sorted=True, hide_fast=True): # Find all of the roots with import = 0 out_file.write('%5s %5s %-40s @ %s:%s\n' % ('cum', 'inline', 'name', 'file', 'line')) todo = [(value[-1], key) for key,value in _info.iteritems() if value[0] == 0] if sorted: todo.sort() while todo: cum_time, cur = todo.pop() children = _total_stack[cur] c_times = [] info = _info[cur] if hide_fast and info[-1] < 0.0001: continue # Compute the module time by removing the children times mod_time = info[-1] for child in children: c_info = _info[child] mod_time -= c_info[-1] c_times.append((c_info[-1], child)) # indent, cum_time, mod_time, name, # scope_name, frame_name, frame_lineno out_file.write('%5.1f %5.1f %-40s @ %s:%d\n' % (info[-1]*1000., mod_time*1000., ('+'*info[0] + cur[1]), info[1], info[2])) if sorted: c_times.sort() else: c_times.reverse() todo.extend(c_times) _real_import = __import__ def timed_import(name, globals=None, locals=None, fromlist=None, level=None): """Wrap around standard importer to log import time""" # normally there are 4, but if this is called as __import__ eg by # /usr/lib/python2.6/email/__init__.py then there may be only one # parameter # level is only passed by python2.6 if globals is None: # can't determine the scope name afaics; we could peek up the stack to # see where this is being called from, but it should be a rare case. scope_name = None else: scope_name = globals.get('__name__', None) if scope_name is None: scope_name = globals.get('__file__', None) if scope_name is None: scope_name = globals.keys() else: # Trim out paths before bzrlib loc = scope_name.find('bzrlib') if loc != -1: scope_name = scope_name[loc:] # For stdlib, trim out early paths loc = scope_name.find('python2.4') if loc != -1: scope_name = scope_name[loc:] # Figure out the frame that is doing the importing frame = sys._getframe(1) frame_name = frame.f_globals.get('__name__', '') extra = '' if frame_name.endswith('demandload'): # If this was demandloaded, we have 3 frames to ignore extra = '(demandload) ' frame = sys._getframe(4) frame_name = frame.f_globals.get('__name__', '') elif frame_name.endswith('lazy_import'): # If this was lazily imported, we have 3 frames to ignore extra = '[l] ' frame = sys._getframe(4) frame_name = frame.f_globals.get('__name__', '') if fromlist: extra += ' [%s]' % (', '.join(map(str, fromlist)),) frame_lineno = frame.f_lineno this = stack_add(extra + name, frame_name, frame_lineno, scope_name) tstart = _timer() try: # Do the import mod = _real_import(name, globals, locals, fromlist) finally: tload = _timer()-tstart stack_finish(this, tload) return mod _real_compile = re._compile def timed_compile(*args, **kwargs): """Log how long it takes to compile a regex""" # And who is requesting this? frame = sys._getframe(2) frame_name = frame.f_globals.get('__name__', '') extra = '' if frame_name.endswith('lazy_regex'): # If this was lazily compiled, we have 3 more frames to ignore extra = '[l] ' frame = sys._getframe(5) frame_name = frame.f_globals.get('__name__', '') frame_lineno = frame.f_lineno this = stack_add(extra+repr(args[0]), frame_name, frame_lineno) tstart = _timer() try: # Measure the compile time comp = _real_compile(*args, **kwargs) finally: tcompile = _timer() - tstart stack_finish(this, tcompile) return comp def install(): """Install the hooks for measuring import and regex compile time.""" __builtins__['__import__'] = timed_import re._compile = timed_compile def uninstall(): """Remove the import and regex compile timing hooks.""" __builtins__['__import__'] = _real_import re._compile = _real_compile bzr-2.7.0/setup.py0000755000000000000000000007320212316055601012163 0ustar 00000000000000#! /usr/bin/env python """Installation script for bzr. Run it with './setup.py install', or './setup.py --help' for more options """ import os import os.path import sys import copy import glob if sys.version_info < (2, 6): sys.stderr.write("[ERROR] Not a supported Python version. Need 2.6+\n") sys.exit(1) # NOTE: The directory containing setup.py, whether run by 'python setup.py' or # './setup.py' or the equivalent with another path, should always be at the # start of the path, so this should find the right one... import bzrlib def get_long_description(): dirname = os.path.dirname(__file__) readme = os.path.join(dirname, 'README') f = open(readme, 'rb') try: return f.read() finally: f.close() ## # META INFORMATION FOR SETUP # see http://docs.python.org/dist/meta-data.html META_INFO = { 'name': 'bzr', 'version': bzrlib.__version__, 'author': 'Canonical Ltd', 'author_email': 'bazaar@lists.canonical.com', 'url': 'http://bazaar.canonical.com/', 'description': 'Friendly distributed version control system', 'license': 'GNU GPL v2', 'download_url': 'https://launchpad.net/bzr/+download', 'long_description': get_long_description(), 'classifiers': [ 'Development Status :: 6 - Mature', 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: System Administrators', 'License :: OSI Approved :: GNU General Public License (GPL)', 'Operating System :: Microsoft :: Windows', 'Operating System :: OS Independent', 'Operating System :: POSIX', 'Programming Language :: Python', 'Programming Language :: C', 'Topic :: Software Development :: Version Control', ], } # The list of packages is automatically generated later. Add other things # that are part of BZRLIB here. BZRLIB = {} PKG_DATA = {# install files from selftest suite 'package_data': {'bzrlib': ['doc/api/*.txt', 'tests/test_patches_data/*', 'help_topics/en/*.txt', 'tests/ssl_certs/ca.crt', 'tests/ssl_certs/server_without_pass.key', 'tests/ssl_certs/server_with_pass.key', 'tests/ssl_certs/server.crt', ]}, } I18N_FILES = [] for filepath in glob.glob("bzrlib/locale/*/LC_MESSAGES/*.mo"): langfile = filepath[len("bzrlib/locale/"):] targetpath = os.path.dirname(os.path.join("share/locale", langfile)) I18N_FILES.append((targetpath, [filepath])) def get_bzrlib_packages(): """Recurse through the bzrlib directory, and extract the package names""" packages = [] base_path = os.path.dirname(os.path.abspath(bzrlib.__file__)) for root, dirs, files in os.walk(base_path): if '__init__.py' in files: assert root.startswith(base_path) # Get just the path below bzrlib package_path = root[len(base_path):] # Remove leading and trailing slashes package_path = package_path.strip('\\/') if not package_path: package_name = 'bzrlib' else: package_name = ('bzrlib.' + package_path.replace('/', '.').replace('\\', '.')) packages.append(package_name) return sorted(packages) BZRLIB['packages'] = get_bzrlib_packages() from distutils import log from distutils.core import setup from distutils.version import LooseVersion from distutils.command.install_scripts import install_scripts from distutils.command.install_data import install_data from distutils.command.build import build ############################### # Overridden distutils actions ############################### class my_install_scripts(install_scripts): """ Customized install_scripts distutils action. Create bzr.bat for win32. """ def run(self): install_scripts.run(self) # standard action if sys.platform == "win32": try: scripts_dir = os.path.join(sys.prefix, 'Scripts') script_path = self._quoted_path(os.path.join(scripts_dir, "bzr")) python_exe = self._quoted_path(sys.executable) args = self._win_batch_args() batch_str = "@%s %s %s" % (python_exe, script_path, args) batch_path = os.path.join(self.install_dir, "bzr.bat") f = file(batch_path, "w") f.write(batch_str) f.close() print("Created: %s" % batch_path) except Exception: e = sys.exc_info()[1] print("ERROR: Unable to create %s: %s" % (batch_path, e)) def _quoted_path(self, path): if ' ' in path: return '"' + path + '"' else: return path def _win_batch_args(self): from bzrlib.win32utils import winver if winver == 'Windows NT': return '%*' else: return '%1 %2 %3 %4 %5 %6 %7 %8 %9' #/class my_install_scripts class bzr_build(build): """Customized build distutils action. Generate bzr.1. """ sub_commands = build.sub_commands + [ ('build_mo', lambda _: True), ] def run(self): build.run(self) from tools import generate_docs generate_docs.main(argv=["bzr", "man"]) ######################## ## Setup ######################## from bzrlib.bzr_distutils import build_mo command_classes = {'install_scripts': my_install_scripts, 'build': bzr_build, 'build_mo': build_mo, } from distutils import log from distutils.errors import CCompilerError, DistutilsPlatformError from distutils.extension import Extension ext_modules = [] try: try: from Cython.Distutils import build_ext from Cython.Compiler.Version import version as pyrex_version except ImportError: print("No Cython, trying Pyrex...") from Pyrex.Distutils import build_ext from Pyrex.Compiler.Version import version as pyrex_version except ImportError: have_pyrex = False # try to build the extension from the prior generated source. print("") print("The python package 'Pyrex' is not available." " If the .c files are available,") print("they will be built," " but modifying the .pyx files will not rebuild them.") print("") from distutils.command.build_ext import build_ext else: have_pyrex = True pyrex_version_info = LooseVersion(pyrex_version) class build_ext_if_possible(build_ext): user_options = build_ext.user_options + [ ('allow-python-fallback', None, "When an extension cannot be built, allow falling" " back to the pure-python implementation.") ] def initialize_options(self): build_ext.initialize_options(self) self.allow_python_fallback = False def run(self): try: build_ext.run(self) except DistutilsPlatformError: e = sys.exc_info()[1] if not self.allow_python_fallback: log.warn('\n Cannot build extensions.\n' ' Use "build_ext --allow-python-fallback" to use' ' slower python implementations instead.\n') raise log.warn(str(e)) log.warn('\n Extensions cannot be built.\n' ' Using the slower Python implementations instead.\n') def build_extension(self, ext): try: build_ext.build_extension(self, ext) except CCompilerError: if not self.allow_python_fallback: log.warn('\n Cannot build extension "%s".\n' ' Use "build_ext --allow-python-fallback" to use' ' slower python implementations instead.\n' % (ext.name,)) raise log.warn('\n Building of "%s" extension failed.\n' ' Using the slower Python implementation instead.' % (ext.name,)) # Override the build_ext if we have Pyrex available command_classes['build_ext'] = build_ext_if_possible unavailable_files = [] def add_pyrex_extension(module_name, libraries=None, extra_source=[]): """Add a pyrex module to build. This will use Pyrex to auto-generate the .c file if it is available. Otherwise it will fall back on the .c file. If the .c file is not available, it will warn, and not add anything. You can pass any extra options to Extension through kwargs. One example is 'libraries = []'. :param module_name: The python path to the module. This will be used to determine the .pyx and .c files to use. """ path = module_name.replace('.', '/') pyrex_name = path + '.pyx' c_name = path + '.c' define_macros = [] if sys.platform == 'win32': # pyrex uses the macro WIN32 to detect the platform, even though it # should be using something like _WIN32 or MS_WINDOWS, oh well, we can # give it the right value. define_macros.append(('WIN32', None)) if have_pyrex: source = [pyrex_name] else: if not os.path.isfile(c_name): unavailable_files.append(c_name) return else: source = [c_name] source.extend(extra_source) ext_modules.append(Extension(module_name, source, define_macros=define_macros, libraries=libraries)) add_pyrex_extension('bzrlib._annotator_pyx') add_pyrex_extension('bzrlib._bencode_pyx') add_pyrex_extension('bzrlib._chunks_to_lines_pyx') add_pyrex_extension('bzrlib._groupcompress_pyx', extra_source=['bzrlib/diff-delta.c']) add_pyrex_extension('bzrlib._knit_load_data_pyx') add_pyrex_extension('bzrlib._known_graph_pyx') add_pyrex_extension('bzrlib._rio_pyx') if sys.platform == 'win32': add_pyrex_extension('bzrlib._dirstate_helpers_pyx', libraries=['Ws2_32']) add_pyrex_extension('bzrlib._walkdirs_win32') else: if have_pyrex and pyrex_version_info == LooseVersion("0.9.4.1"): # Pyrex 0.9.4.1 fails to compile this extension correctly # The code it generates re-uses a "local" pointer and # calls "PY_DECREF" after having set it to NULL. (It mixes PY_XDECREF # which is NULL safe with PY_DECREF which is not.) # # print('Cannot build extension "bzrlib._dirstate_helpers_pyx" using') print('your version of pyrex "%s". Please upgrade your pyrex' % (pyrex_version,)) print('install. For now, the non-compiled (python) version will') print('be used instead.') else: add_pyrex_extension('bzrlib._dirstate_helpers_pyx') add_pyrex_extension('bzrlib._readdir_pyx') add_pyrex_extension('bzrlib._chk_map_pyx') ext_modules.append(Extension('bzrlib._patiencediff_c', ['bzrlib/_patiencediff_c.c'])) if have_pyrex and pyrex_version_info < LooseVersion("0.9.6.3"): print("") print('Your Pyrex/Cython version %s is too old to build the simple_set' % ( pyrex_version)) print('and static_tuple extensions.') print('Please upgrade to at least Pyrex 0.9.6.3') print("") # TODO: Should this be a fatal error? else: # We only need 0.9.6.3 to build _simple_set_pyx, but static_tuple depends # on simple_set add_pyrex_extension('bzrlib._simple_set_pyx') ext_modules.append(Extension('bzrlib._static_tuple_c', ['bzrlib/_static_tuple_c.c'])) add_pyrex_extension('bzrlib._btree_serializer_pyx') if unavailable_files: print('C extension(s) not found:') print(' %s' % ('\n '.join(unavailable_files),)) print('The python versions will be used instead.') print("") def get_tbzr_py2exe_info(includes, excludes, packages, console_targets, gui_targets, data_files): packages.append('tbzrcommands') # ModuleFinder can't handle runtime changes to __path__, but # win32com uses them. Hook this in so win32com.shell is found. import modulefinder import win32com import cPickle as pickle for p in win32com.__path__[1:]: modulefinder.AddPackagePath("win32com", p) for extra in ["win32com.shell"]: __import__(extra) m = sys.modules[extra] for p in m.__path__[1:]: modulefinder.AddPackagePath(extra, p) # TBZR points to the TBZR directory tbzr_root = os.environ["TBZR"] # Ensure tbzrlib itself is on sys.path sys.path.append(tbzr_root) packages.append("tbzrlib") # collect up our icons. cwd = os.getcwd() ico_root = os.path.join(tbzr_root, 'tbzrlib', 'resources') icos = [] # list of (path_root, relative_ico_path) # First always bzr's icon and its in the root of the bzr tree. icos.append(('', 'bzr.ico')) for root, dirs, files in os.walk(ico_root): icos.extend([(ico_root, os.path.join(root, f)[len(ico_root)+1:]) for f in files if f.endswith('.ico')]) # allocate an icon ID for each file and the full path to the ico icon_resources = [(rid, os.path.join(ico_dir, ico_name)) for rid, (ico_dir, ico_name) in enumerate(icos)] # create a string resource with the mapping. Might as well save the # runtime some effort and write a pickle. # Runtime expects unicode objects with forward-slash seps. fse = sys.getfilesystemencoding() map_items = [(f.replace('\\', '/').decode(fse), rid) for rid, (_, f) in enumerate(icos)] ico_map = dict(map_items) # Create a new resource type of 'ICON_MAP', and use ID=1 other_resources = [ ("ICON_MAP", 1, pickle.dumps(ico_map))] excludes.extend("""pywin pywin.dialogs pywin.dialogs.list win32ui crawler.Crawler""".split()) # tbzrcache executables - a "console" version for debugging and a # GUI version that is generally used. tbzrcache = dict( script = os.path.join(tbzr_root, "scripts", "tbzrcache.py"), icon_resources = icon_resources, other_resources = other_resources, ) console_targets.append(tbzrcache) # Make a windows version which is the same except for the base name. tbzrcachew = tbzrcache.copy() tbzrcachew["dest_base"]="tbzrcachew" gui_targets.append(tbzrcachew) # ditto for the tbzrcommand tool tbzrcommand = dict( script = os.path.join(tbzr_root, "scripts", "tbzrcommand.py"), icon_resources = icon_resources, other_resources = other_resources, ) console_targets.append(tbzrcommand) tbzrcommandw = tbzrcommand.copy() tbzrcommandw["dest_base"]="tbzrcommandw" gui_targets.append(tbzrcommandw) # A utility to see python output from both C++ and Python based shell # extensions tracer = dict(script=os.path.join(tbzr_root, "scripts", "tbzrtrace.py")) console_targets.append(tracer) # The C++ implemented shell extensions. dist_dir = os.path.join(tbzr_root, "shellext", "build") data_files.append(('', [os.path.join(dist_dir, 'tbzrshellext_x86.dll')])) data_files.append(('', [os.path.join(dist_dir, 'tbzrshellext_x64.dll')])) def get_qbzr_py2exe_info(includes, excludes, packages, data_files): # PyQt4 itself still escapes the plugin detection code for some reason... includes.append('PyQt4.QtCore') includes.append('PyQt4.QtGui') includes.append('PyQt4.QtTest') includes.append('sip') # extension module required for Qt. packages.append('pygments') # colorizer for qbzr packages.append('docutils') # html formatting includes.append('win32event') # for qsubprocess stuff # the qt binaries might not be on PATH... # They seem to install to a place like C:\Python25\PyQt4\* # Which is not the same as C:\Python25\Lib\site-packages\PyQt4 pyqt_dir = os.path.join(sys.prefix, "PyQt4") pyqt_bin_dir = os.path.join(pyqt_dir, "bin") if os.path.isdir(pyqt_bin_dir): path = os.environ.get("PATH", "") if pyqt_bin_dir.lower() not in [p.lower() for p in path.split(os.pathsep)]: os.environ["PATH"] = path + os.pathsep + pyqt_bin_dir # also add all imageformat plugins to distribution # We will look in 2 places, dirname(PyQt4.__file__) and pyqt_dir base_dirs_to_check = [] if os.path.isdir(pyqt_dir): base_dirs_to_check.append(pyqt_dir) try: import PyQt4 except ImportError: pass else: pyqt4_base_dir = os.path.dirname(PyQt4.__file__) if pyqt4_base_dir != pyqt_dir: base_dirs_to_check.append(pyqt4_base_dir) if not base_dirs_to_check: log.warn("Can't find PyQt4 installation -> not including imageformat" " plugins") else: files = [] for base_dir in base_dirs_to_check: plug_dir = os.path.join(base_dir, 'plugins', 'imageformats') if os.path.isdir(plug_dir): for fname in os.listdir(plug_dir): # Include plugin dlls, but not debugging dlls fullpath = os.path.join(plug_dir, fname) if fname.endswith('.dll') and not fname.endswith('d4.dll'): files.append(fullpath) if files: data_files.append(('imageformats', files)) else: log.warn('PyQt4 was found, but we could not find any imageformat' ' plugins. Are you sure your configuration is correct?') def get_svn_py2exe_info(includes, excludes, packages): packages.append('subvertpy') packages.append('sqlite3') def get_git_py2exe_info(includes, excludes, packages): packages.append('dulwich') def get_fastimport_py2exe_info(includes, excludes, packages): # This is the python-fastimport package, not to be confused with the # bzr-fastimport plugin. packages.append('fastimport') if 'bdist_wininst' in sys.argv: def find_docs(): docs = [] for root, dirs, files in os.walk('doc'): r = [] for f in files: if (os.path.splitext(f)[1] in ('.html','.css','.png','.pdf') or f == 'quick-start-summary.svg'): r.append(os.path.join(root, f)) if r: relative = root[4:] if relative: target = os.path.join('Doc\\Bazaar', relative) else: target = 'Doc\\Bazaar' docs.append((target, r)) return docs # python's distutils-based win32 installer ARGS = {'scripts': ['bzr', 'tools/win32/bzr-win32-bdist-postinstall.py'], 'ext_modules': ext_modules, # help pages 'data_files': find_docs(), # for building pyrex extensions 'cmdclass': command_classes, } ARGS.update(META_INFO) ARGS.update(BZRLIB) PKG_DATA['package_data']['bzrlib'].append('locale/*/LC_MESSAGES/*.mo') ARGS.update(PKG_DATA) setup(**ARGS) elif 'py2exe' in sys.argv: # py2exe setup import py2exe # pick real bzr version import bzrlib version_number = [] for i in bzrlib.version_info[:4]: try: i = int(i) except ValueError: i = 0 version_number.append(str(i)) version_str = '.'.join(version_number) # An override to install_data used only by py2exe builds, which arranges # to byte-compile any .py files in data_files (eg, our plugins) # Necessary as we can't rely on the user having the relevant permissions # to the "Program Files" directory to generate them on the fly. class install_data_with_bytecompile(install_data): def run(self): from distutils.util import byte_compile install_data.run(self) py2exe = self.distribution.get_command_obj('py2exe', False) # GZ 2010-04-19: Setup has py2exe.optimize as 2, but give plugins # time before living with docstring stripping optimize = 1 compile_names = [f for f in self.outfiles if f.endswith('.py')] # Round mtime to nearest even second so that installing on a FAT # filesystem bytecode internal and script timestamps will match for f in compile_names: mtime = os.stat(f).st_mtime remainder = mtime % 2 if remainder: mtime -= remainder os.utime(f, (mtime, mtime)) byte_compile(compile_names, optimize=optimize, force=self.force, prefix=self.install_dir, dry_run=self.dry_run) self.outfiles.extend([f + 'o' for f in compile_names]) # end of class install_data_with_bytecompile target = py2exe.build_exe.Target(script = "bzr", dest_base = "bzr", icon_resources = [(0,'bzr.ico')], name = META_INFO['name'], version = version_str, description = META_INFO['description'], author = META_INFO['author'], copyright = "(c) Canonical Ltd, 2005-2010", company_name = "Canonical Ltd.", comments = META_INFO['description'], ) gui_target = copy.copy(target) gui_target.dest_base = "bzrw" packages = BZRLIB['packages'] packages.remove('bzrlib') packages = [i for i in packages if not i.startswith('bzrlib.plugins')] includes = [] for i in glob.glob('bzrlib\\*.py'): module = i[:-3].replace('\\', '.') if module.endswith('__init__'): module = module[:-len('__init__')] includes.append(module) additional_packages = set() if sys.version.startswith('2.4'): # adding elementtree package additional_packages.add('elementtree') elif sys.version.startswith('2.6') or sys.version.startswith('2.5'): additional_packages.add('xml.etree') else: import warnings warnings.warn('Unknown Python version.\n' 'Please check setup.py script for compatibility.') # Although we currently can't enforce it, we consider it an error for # py2exe to report any files are "missing". Such modules we know aren't # used should be listed here. excludes = """Tkinter psyco ElementPath r_hmac ImaginaryModule cElementTree elementtree.ElementTree Crypto.PublicKey._fastmath medusa medusa.filesys medusa.ftp_server tools resource validate""".split() dll_excludes = [] # email package from std python library use lazy import, # so we need to explicitly add all package additional_packages.add('email') # And it uses funky mappings to conver to 'Oldname' to 'newname'. As # a result, packages like 'email.Parser' show as missing. Tell py2exe # to exclude them. import email for oldname in getattr(email, '_LOWERNAMES', []): excludes.append("email." + oldname) for oldname in getattr(email, '_MIMENAMES', []): excludes.append("email.MIME" + oldname) # text files for help topis text_topics = glob.glob('bzrlib/help_topics/en/*.txt') topics_files = [('lib/help_topics/en', text_topics)] # built-in plugins plugins_files = [] # XXX - should we consider having the concept of an 'official' build, # which hard-codes the list of plugins, gets more upset if modules are # missing, etc? plugins = None # will be a set after plugin sniffing... for root, dirs, files in os.walk('bzrlib/plugins'): if root == 'bzrlib/plugins': plugins = set(dirs) # We ship plugins as normal files on the file-system - however, # the build process can cause *some* of these plugin files to end # up in library.zip. Thus, we saw (eg) "plugins/svn/test" in # library.zip, and then saw import errors related to that as the # rest of the svn plugin wasn't. So we tell py2exe to leave the # plugins out of the .zip file excludes.extend(["bzrlib.plugins." + d for d in dirs]) x = [] for i in files: # Throw away files we don't want packaged. Note that plugins may # have data files with all sorts of extensions so we need to # be conservative here about what we ditch. ext = os.path.splitext(i)[1] if ext.endswith('~') or ext in [".pyc", ".swp"]: continue if i == '__init__.py' and root == 'bzrlib/plugins': continue x.append(os.path.join(root, i)) if x: target_dir = root[len('bzrlib/'):] # install to 'plugins/...' plugins_files.append((target_dir, x)) # find modules for built-in plugins import tools.package_mf mf = tools.package_mf.CustomModuleFinder() mf.run_package('bzrlib/plugins') packs, mods = mf.get_result() additional_packages.update(packs) includes.extend(mods) console_targets = [target, 'tools/win32/bzr_postinstall.py', ] gui_targets = [gui_target] data_files = topics_files + plugins_files + I18N_FILES if 'qbzr' in plugins: get_qbzr_py2exe_info(includes, excludes, packages, data_files) if 'svn' in plugins: get_svn_py2exe_info(includes, excludes, packages) if 'git' in plugins: get_git_py2exe_info(includes, excludes, packages) if 'fastimport' in plugins: get_fastimport_py2exe_info(includes, excludes, packages) if "TBZR" in os.environ: # TORTOISE_OVERLAYS_MSI_WIN32 must be set to the location of the # TortoiseOverlays MSI installer file. It is in the TSVN svn repo and # can be downloaded from (username=guest, blank password): # http://tortoisesvn.tigris.org/svn/tortoisesvn/TortoiseOverlays # look for: version-1.0.4/bin/TortoiseOverlays-1.0.4.11886-win32.msi # Ditto for TORTOISE_OVERLAYS_MSI_X64, pointing at *-x64.msi. for needed in ('TORTOISE_OVERLAYS_MSI_WIN32', 'TORTOISE_OVERLAYS_MSI_X64'): url = ('http://guest:@tortoisesvn.tigris.org/svn/tortoisesvn' '/TortoiseOverlays') if not os.path.isfile(os.environ.get(needed, '')): raise RuntimeError( "\nPlease set %s to the location of the relevant" "\nTortoiseOverlays .msi installer file." " The installers can be found at" "\n %s" "\ncheck in the version-X.Y.Z/bin/ subdir" % (needed, url)) get_tbzr_py2exe_info(includes, excludes, packages, console_targets, gui_targets, data_files) else: # print this warning to stderr as output is redirected, so it is seen # at build time. Also to stdout so it appears in the log for f in (sys.stderr, sys.stdout): f.write("Skipping TBZR binaries - " "please set TBZR to a directory to enable\n") # MSWSOCK.dll is a system-specific library, which py2exe accidentally pulls # in on Vista. dll_excludes.extend(["MSWSOCK.dll", "MSVCP60.dll", "MSVCP90.dll", "powrprof.dll", "SHFOLDER.dll"]) options_list = {"py2exe": {"packages": packages + list(additional_packages), "includes": includes, "excludes": excludes, "dll_excludes": dll_excludes, "dist_dir": "win32_bzr.exe", "optimize": 2, "custom_boot_script": "tools/win32/py2exe_boot_common.py", }, } # We want the libaray.zip to have optimize = 2, but the exe to have # optimize = 1, so that .py files that get compilied at run time # (e.g. user installed plugins) dont have their doc strings removed. class py2exe_no_oo_exe(py2exe.build_exe.py2exe): def build_executable(self, *args, **kwargs): self.optimize = 1 py2exe.build_exe.py2exe.build_executable(self, *args, **kwargs) self.optimize = 2 if __name__ == '__main__': command_classes['install_data'] = install_data_with_bytecompile command_classes['py2exe'] = py2exe_no_oo_exe setup(options=options_list, console=console_targets, windows=gui_targets, zipfile='lib/library.zip', data_files=data_files, cmdclass=command_classes, ) else: # ad-hoc for easy_install DATA_FILES = [] if not 'bdist_egg' in sys.argv: # generate and install bzr.1 only with plain install, not the # easy_install one DATA_FILES = [('man/man1', ['bzr.1'])] DATA_FILES = DATA_FILES + I18N_FILES # std setup ARGS = {'scripts': ['bzr'], 'data_files': DATA_FILES, 'cmdclass': command_classes, 'ext_modules': ext_modules, } ARGS.update(META_INFO) ARGS.update(BZRLIB) ARGS.update(PKG_DATA) if __name__ == '__main__': setup(**ARGS) bzr-2.7.0/tools/0000755000000000000000000000000010313452437011605 5ustar 00000000000000bzr-2.7.0/apport/README0000644000000000000000000000061711357256273012647 0ustar 00000000000000Bazaar supports semi-automatic bug reporting through Apport . If apport is not installed, an exception is printed to stderr in the usual way. For this to work properly it's suggested that two files be installed when a package of bzr is installed: ``bzr.conf`` into ``/etc/apport/crashdb.conf.d`` ``source_bzr.py`` into ``/usr/share/apport/package-hooks`` bzr-2.7.0/apport/bzr-crashdb.conf0000644000000000000000000000025611357256273015036 0ustar 00000000000000bzr = { # most bzr bugs are upstream bugs; file them there 'impl': 'launchpad', 'project': 'bzr', 'bug_pattern_base': 'http://people.canonical.com/~pitti/bugpatterns', } bzr-2.7.0/apport/source_bzr.py0000644000000000000000000000264711357256273014523 0ustar 00000000000000'''apport package hook for Bazaar''' # Copyright (c) 2009, 2010 Canonical Ltd. # Author: Matt Zimmerman # and others from apport.hookutils import * import os bzr_log = os.path.expanduser('~/.bzr.log') dot_bzr = os.path.expanduser('~/.bazaar') def _add_log_tail(report): # may have already been added in-process if 'BzrLogTail' in report: return bzr_log_lines = open(bzr_log).readlines() bzr_log_lines.reverse() bzr_log_tail = [] blanks = 0 for line in bzr_log_lines: if line == '\n': blanks += 1 bzr_log_tail.append(line) if blanks >= 2: break bzr_log_tail.reverse() report['BzrLogTail'] = ''.join(bzr_log_tail) def add_info(report): _add_log_tail(report) if 'BzrPlugins' not in report: # may already be present in-process report['BzrPlugins'] = command_output(['bzr', 'plugins', '-v']) # by default assume bzr crashes are upstream bugs; this relies on # having a bzr entry under /etc/apport/crashdb.conf.d/ report['CrashDB'] = 'bzr' # these may contain some sensitive info (smtp_passwords) # TODO: strip that out and attach the rest #attach_file_if_exists(report, # os.path.join(dot_bzr, 'bazaar.conf', 'BzrConfig') #attach_file_if_exists(report, # os.path.join(dot_bzr, 'locations.conf', 'BzrLocations') # vim: expandtab shiftwidth=4 bzr-2.7.0/bzrlib/__init__.py0000644000000000000000000002132512653717535014061 0ustar 00000000000000# Copyright (C) 2005-2013, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """All of bzr. Developer documentation is available at http://doc.bazaar.canonical.com/bzr.dev/developers/ The project website is at http://bazaar.canonical.com/ Some particularly interesting things in bzrlib are: * bzrlib.initialize -- setup the library for use * bzrlib.plugin.load_plugins -- load all installed plugins * bzrlib.branch.Branch.open -- open a branch * bzrlib.workingtree.WorkingTree.open -- open a working tree We hope you enjoy this library. """ from __future__ import absolute_import import time # Keep track of when bzrlib was first imported, so that we can give rough # timestamps relative to program start in the log file kept by bzrlib.trace. _start_time = time.time() import codecs import sys IGNORE_FILENAME = ".bzrignore" __copyright__ = "Copyright 2005-2012 Canonical Ltd." # same format as sys.version_info: "A tuple containing the five components of # the version number: major, minor, micro, releaselevel, and serial. All # values except releaselevel are integers; the release level is 'alpha', # 'beta', 'candidate', or 'final'. The version_info value corresponding to the # Python version 2.0 is (2, 0, 0, 'final', 0)." Additionally we use a # releaselevel of 'dev' for unreleased under-development code. version_info = (2, 7, 0, 'final', 0) # API compatibility version api_minimum_version = (2, 4, 0) def _format_version_tuple(version_info): """Turn a version number 2, 3 or 5-tuple into a short string. This format matches and the typical presentation used in Python output. This also checks that the version is reasonable: the sub-release must be zero for final releases. >>> print _format_version_tuple((1, 0, 0, 'final', 0)) 1.0.0 >>> print _format_version_tuple((1, 2, 0, 'dev', 0)) 1.2.0dev >>> print _format_version_tuple((1, 2, 0, 'dev', 1)) 1.2.0dev1 >>> print _format_version_tuple((1, 1, 1, 'candidate', 2)) 1.1.1rc2 >>> print _format_version_tuple((2, 1, 0, 'beta', 1)) 2.1b1 >>> print _format_version_tuple((1, 4, 0)) 1.4.0 >>> print _format_version_tuple((1, 4)) 1.4 >>> print _format_version_tuple((2, 1, 0, 'final', 42)) 2.1.0.42 >>> print _format_version_tuple((1, 4, 0, 'wibble', 0)) 1.4.0.wibble.0 """ if len(version_info) == 2: main_version = '%d.%d' % version_info[:2] else: main_version = '%d.%d.%d' % version_info[:3] if len(version_info) <= 3: return main_version release_type = version_info[3] sub = version_info[4] if release_type == 'final' and sub == 0: sub_string = '' elif release_type == 'final': sub_string = '.' + str(sub) elif release_type == 'dev' and sub == 0: sub_string = 'dev' elif release_type == 'dev': sub_string = 'dev' + str(sub) elif release_type in ('alpha', 'beta'): if version_info[2] == 0: main_version = '%d.%d' % version_info[:2] sub_string = release_type[0] + str(sub) elif release_type == 'candidate': sub_string = 'rc' + str(sub) else: return '.'.join(map(str, version_info)) return main_version + sub_string # lazy_regex import must be done after _format_version_tuple definition # to avoid "no attribute '_format_version_tuple'" error when using # deprecated_function in the lazy_regex module. if getattr(sys, '_bzr_lazy_regex', False): # The 'bzr' executable sets _bzr_lazy_regex. We install the lazy regex # hack as soon as possible so that as much of the standard library can # benefit, including the 'string' module. del sys._bzr_lazy_regex import bzrlib.lazy_regex bzrlib.lazy_regex.install_lazy_compile() __version__ = _format_version_tuple(version_info) version_string = __version__ def _patch_filesystem_default_encoding(new_enc): """Change the Python process global encoding for filesystem names The effect is to change how open() and other builtin functions handle unicode filenames on posix systems. This should only be done near startup. The new encoding string passed to this function must survive until process termination, otherwise the interpreter may access uninitialized memory. The use of intern() may defer breakage is but is not enough, the string object should be secure against module reloading and during teardown. """ try: import ctypes old_ptr = ctypes.c_void_p.in_dll(ctypes.pythonapi, "Py_FileSystemDefaultEncoding") except (ImportError, ValueError): return # No ctypes or not CPython implementation, do nothing new_ptr = ctypes.cast(ctypes.c_char_p(intern(new_enc)), ctypes.c_void_p) old_ptr.value = new_ptr.value if sys.getfilesystemencoding() != new_enc: raise RuntimeError("Failed to change the filesystem default encoding") return new_enc # When running under the bzr script, override bad filesystem default encoding. # This is not safe to do for all users of bzrlib, other scripts should instead # just ensure a usable locale is set via the $LANG variable on posix systems. _fs_enc = sys.getfilesystemencoding() if getattr(sys, "_bzr_default_fs_enc", None) is not None: if (_fs_enc is None or codecs.lookup(_fs_enc).name == "ascii"): _fs_enc = _patch_filesystem_default_encoding(sys._bzr_default_fs_enc) if _fs_enc is None: _fs_enc = "ascii" else: _fs_enc = codecs.lookup(_fs_enc).name # bzr has various bits of global state that are slowly being eliminated. # This variable is intended to permit any new state-like things to be attached # to a library_state.BzrLibraryState object rather than getting new global # variables that need to be hunted down. Accessing the current BzrLibraryState # through this variable is not encouraged: it is better to pass it around as # part of the context of an operation than to look it up directly, but when # that is too hard, it is better to use this variable than to make a brand new # global variable. # If using this variable by looking it up (because it can't be easily obtained) # it is important to store the reference you get, rather than looking it up # repeatedly; that way your code will behave properly in the bzrlib test suite # and from programs that do use multiple library contexts. global_state = None def initialize(setup_ui=True, stdin=None, stdout=None, stderr=None): """Set up everything needed for normal use of bzrlib. Most applications that embed bzrlib, including bzr itself, should call this function to initialize various subsystems. More options may be added in future so callers should use named arguments. The object returned by this function can be used as a contex manager through the 'with' statement to automatically shut down when the process is finished with bzrlib. However (from bzr 2.4) it's not necessary to separately enter the context as well as starting bzr: bzrlib is ready to go when this function returns. :param setup_ui: If true (default) use a terminal UI; otherwise some other ui_factory must be assigned to `bzrlib.ui.ui_factory` by the caller. :param stdin, stdout, stderr: If provided, use these for terminal IO; otherwise use the files in `sys`. :return: A context manager for the use of bzrlib. The __exit__ should be called by the caller before exiting their process or otherwise stopping use of bzrlib. Advanced callers can use BzrLibraryState directly. """ from bzrlib import library_state, trace if setup_ui: import bzrlib.ui stdin = stdin or sys.stdin stdout = stdout or sys.stdout stderr = stderr or sys.stderr ui_factory = bzrlib.ui.make_ui_for_terminal(stdin, stdout, stderr) else: ui_factory = None tracer = trace.DefaultConfig() state = library_state.BzrLibraryState(ui=ui_factory, trace=tracer) # Start automatically in case people don't realize this returns a context. state._start() return state def test_suite(): import tests return tests.test_suite() bzr-2.7.0/bzrlib/_annotator_py.py0000644000000000000000000003256211673635356015204 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Functionality for doing annotations in the 'optimal' way""" from __future__ import absolute_import from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( annotate, # Must be lazy to avoid circular importing graph as _mod_graph, patiencediff, ) """) from bzrlib import ( errors, osutils, ui, ) class Annotator(object): """Class that drives performing annotations.""" def __init__(self, vf): """Create a new Annotator from a VersionedFile.""" self._vf = vf self._parent_map = {} self._text_cache = {} # Map from key => number of nexts that will be built from this key self._num_needed_children = {} self._annotations_cache = {} self._heads_provider = None self._ann_tuple_cache = {} def _update_needed_children(self, key, parent_keys): for parent_key in parent_keys: if parent_key in self._num_needed_children: self._num_needed_children[parent_key] += 1 else: self._num_needed_children[parent_key] = 1 def _get_needed_keys(self, key): """Determine the texts we need to get from the backing vf. :return: (vf_keys_needed, ann_keys_needed) vf_keys_needed These are keys that we need to get from the vf ann_keys_needed Texts which we have in self._text_cache but we don't have annotations for. We need to yield these in the proper order so that we can get proper annotations. """ parent_map = self._parent_map # We need 1 extra copy of the node we will be looking at when we are # done self._num_needed_children[key] = 1 vf_keys_needed = set() ann_keys_needed = set() needed_keys = set([key]) while needed_keys: parent_lookup = [] next_parent_map = {} for key in needed_keys: if key in self._parent_map: # We don't need to lookup this key in the vf if key not in self._text_cache: # Extract this text from the vf vf_keys_needed.add(key) elif key not in self._annotations_cache: # We do need to annotate ann_keys_needed.add(key) next_parent_map[key] = self._parent_map[key] else: parent_lookup.append(key) vf_keys_needed.add(key) needed_keys = set() next_parent_map.update(self._vf.get_parent_map(parent_lookup)) for key, parent_keys in next_parent_map.iteritems(): if parent_keys is None: # No graph versionedfile parent_keys = () next_parent_map[key] = () self._update_needed_children(key, parent_keys) needed_keys.update([key for key in parent_keys if key not in parent_map]) parent_map.update(next_parent_map) # _heads_provider does some graph caching, so it is only valid while # self._parent_map hasn't changed self._heads_provider = None return vf_keys_needed, ann_keys_needed def _get_needed_texts(self, key, pb=None): """Get the texts we need to properly annotate key. :param key: A Key that is present in self._vf :return: Yield (this_key, text, num_lines) 'text' is an opaque object that just has to work with whatever matcher object we are using. Currently it is always 'lines' but future improvements may change this to a simple text string. """ keys, ann_keys = self._get_needed_keys(key) if pb is not None: pb.update('getting stream', 0, len(keys)) stream = self._vf.get_record_stream(keys, 'topological', True) for idx, record in enumerate(stream): if pb is not None: pb.update('extracting', 0, len(keys)) if record.storage_kind == 'absent': raise errors.RevisionNotPresent(record.key, self._vf) this_key = record.key lines = osutils.chunks_to_lines(record.get_bytes_as('chunked')) num_lines = len(lines) self._text_cache[this_key] = lines yield this_key, lines, num_lines for key in ann_keys: lines = self._text_cache[key] num_lines = len(lines) yield key, lines, num_lines def _get_parent_annotations_and_matches(self, key, text, parent_key): """Get the list of annotations for the parent, and the matching lines. :param text: The opaque value given by _get_needed_texts :param parent_key: The key for the parent text :return: (parent_annotations, matching_blocks) parent_annotations is a list as long as the number of lines in parent matching_blocks is a list of (parent_idx, text_idx, len) tuples indicating which lines match between the two texts """ parent_lines = self._text_cache[parent_key] parent_annotations = self._annotations_cache[parent_key] # PatienceSequenceMatcher should probably be part of Policy matcher = patiencediff.PatienceSequenceMatcher(None, parent_lines, text) matching_blocks = matcher.get_matching_blocks() return parent_annotations, matching_blocks def _update_from_first_parent(self, key, annotations, lines, parent_key): """Reannotate this text relative to its first parent.""" (parent_annotations, matching_blocks) = self._get_parent_annotations_and_matches( key, lines, parent_key) for parent_idx, lines_idx, match_len in matching_blocks: # For all matching regions we copy across the parent annotations annotations[lines_idx:lines_idx + match_len] = \ parent_annotations[parent_idx:parent_idx + match_len] def _update_from_other_parents(self, key, annotations, lines, this_annotation, parent_key): """Reannotate this text relative to a second (or more) parent.""" (parent_annotations, matching_blocks) = self._get_parent_annotations_and_matches( key, lines, parent_key) last_ann = None last_parent = None last_res = None # TODO: consider making all annotations unique and then using 'is' # everywhere. Current results claim that isn't any faster, # because of the time spent deduping # deduping also saves a bit of memory. For NEWS it saves ~1MB, # but that is out of 200-300MB for extracting everything, so a # fairly trivial amount for parent_idx, lines_idx, match_len in matching_blocks: # For lines which match this parent, we will now resolve whether # this parent wins over the current annotation ann_sub = annotations[lines_idx:lines_idx + match_len] par_sub = parent_annotations[parent_idx:parent_idx + match_len] if ann_sub == par_sub: continue for idx in xrange(match_len): ann = ann_sub[idx] par_ann = par_sub[idx] ann_idx = lines_idx + idx if ann == par_ann: # Nothing to change continue if ann == this_annotation: # Originally claimed 'this', but it was really in this # parent annotations[ann_idx] = par_ann continue # Resolve the fact that both sides have a different value for # last modified if ann == last_ann and par_ann == last_parent: annotations[ann_idx] = last_res else: new_ann = set(ann) new_ann.update(par_ann) new_ann = tuple(sorted(new_ann)) annotations[ann_idx] = new_ann last_ann = ann last_parent = par_ann last_res = new_ann def _record_annotation(self, key, parent_keys, annotations): self._annotations_cache[key] = annotations for parent_key in parent_keys: num = self._num_needed_children[parent_key] num -= 1 if num == 0: del self._text_cache[parent_key] del self._annotations_cache[parent_key] # Do we want to clean up _num_needed_children at this point as # well? self._num_needed_children[parent_key] = num def _annotate_one(self, key, text, num_lines): this_annotation = (key,) # Note: annotations will be mutated by calls to _update_from* annotations = [this_annotation] * num_lines parent_keys = self._parent_map[key] if parent_keys: self._update_from_first_parent(key, annotations, text, parent_keys[0]) for parent in parent_keys[1:]: self._update_from_other_parents(key, annotations, text, this_annotation, parent) self._record_annotation(key, parent_keys, annotations) def add_special_text(self, key, parent_keys, text): """Add a specific text to the graph. This is used to add a text which is not otherwise present in the versioned file. (eg. a WorkingTree injecting 'current:' into the graph to annotate the edited content.) :param key: The key to use to request this text be annotated :param parent_keys: The parents of this text :param text: A string containing the content of the text """ self._parent_map[key] = parent_keys self._text_cache[key] = osutils.split_lines(text) self._heads_provider = None def annotate(self, key): """Return annotated fulltext for the given key. :param key: A tuple defining the text to annotate :return: ([annotations], [lines]) annotations is a list of tuples of keys, one for each line in lines each key is a possible source for the given line. lines the text of "key" as a list of lines """ pb = ui.ui_factory.nested_progress_bar() try: for text_key, text, num_lines in self._get_needed_texts(key, pb=pb): self._annotate_one(text_key, text, num_lines) finally: pb.finished() try: annotations = self._annotations_cache[key] except KeyError: raise errors.RevisionNotPresent(key, self._vf) return annotations, self._text_cache[key] def _get_heads_provider(self): if self._heads_provider is None: self._heads_provider = _mod_graph.KnownGraph(self._parent_map) return self._heads_provider def _resolve_annotation_tie(self, the_heads, line, tiebreaker): if tiebreaker is None: head = sorted(the_heads)[0] else: # Backwards compatibility, break up the heads into pairs and # resolve the result next_head = iter(the_heads) head = next_head.next() for possible_head in next_head: annotated_lines = ((head, line), (possible_head, line)) head = tiebreaker(annotated_lines)[0] return head def annotate_flat(self, key): """Determine the single-best-revision to source for each line. This is meant as a compatibility thunk to how annotate() used to work. :return: [(ann_key, line)] A list of tuples with a single annotation key for each line. """ custom_tiebreaker = annotate._break_annotation_tie annotations, lines = self.annotate(key) out = [] heads = self._get_heads_provider().heads append = out.append for annotation, line in zip(annotations, lines): if len(annotation) == 1: head = annotation[0] else: the_heads = heads(annotation) if len(the_heads) == 1: for head in the_heads: break # get the item out of the set else: head = self._resolve_annotation_tie(the_heads, line, custom_tiebreaker) append((head, line)) return out bzr-2.7.0/bzrlib/_annotator_pyx.pyx0000644000000000000000000003007711337021464015545 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Functionality for doing annotations in the 'optimal' way""" cdef extern from "python-compat.h": pass cdef extern from "Python.h": ctypedef int Py_ssize_t ctypedef struct PyObject: pass ctypedef struct PyListObject: PyObject **ob_item int PyList_CheckExact(object) PyObject *PyList_GET_ITEM(object, Py_ssize_t o) Py_ssize_t PyList_GET_SIZE(object) int PyList_Append(object, object) except -1 int PyList_SetItem(object, Py_ssize_t o, object) except -1 int PyList_Sort(object) except -1 int PyTuple_CheckExact(object) object PyTuple_New(Py_ssize_t len) void PyTuple_SET_ITEM(object, Py_ssize_t pos, object) void PyTuple_SET_ITEM_ptr "PyTuple_SET_ITEM" (object, Py_ssize_t, PyObject *) int PyTuple_Resize(PyObject **, Py_ssize_t newlen) PyObject *PyTuple_GET_ITEM(object, Py_ssize_t o) Py_ssize_t PyTuple_GET_SIZE(object) PyObject *PyDict_GetItem(object d, object k) int PyDict_SetItem(object d, object k, object v) except -1 void Py_INCREF(object) void Py_INCREF_ptr "Py_INCREF" (PyObject *) void Py_DECREF_ptr "Py_DECREF" (PyObject *) int Py_EQ int Py_LT int PyObject_RichCompareBool(object, object, int opid) except -1 int PyObject_RichCompareBool_ptr "PyObject_RichCompareBool" ( PyObject *, PyObject *, int opid) from bzrlib import _annotator_py cdef int _check_annotations_are_lists(annotations, parent_annotations) except -1: if not PyList_CheckExact(annotations): raise TypeError('annotations must be a list') if not PyList_CheckExact(parent_annotations): raise TypeError('parent_annotations must be a list') return 0 cdef int _check_match_ranges(parent_annotations, annotations, Py_ssize_t parent_idx, Py_ssize_t lines_idx, Py_ssize_t match_len) except -1: if parent_idx + match_len > PyList_GET_SIZE(parent_annotations): raise ValueError('Match length exceeds len of' ' parent_annotations %s > %s' % (parent_idx + match_len, PyList_GET_SIZE(parent_annotations))) if lines_idx + match_len > PyList_GET_SIZE(annotations): raise ValueError('Match length exceeds len of' ' annotations %s > %s' % (lines_idx + match_len, PyList_GET_SIZE(annotations))) return 0 cdef PyObject *_next_tuple_entry(object tpl, Py_ssize_t *pos): # cannot_raise """Return the next entry from this tuple. :param tpl: The tuple we are investigating, *must* be a PyTuple :param pos: The last item we found. Will be updated to the new position. This cannot raise an exception, as it does no error checking. """ pos[0] = pos[0] + 1 if pos[0] >= PyTuple_GET_SIZE(tpl): return NULL return PyTuple_GET_ITEM(tpl, pos[0]) cdef object _combine_annotations(ann_one, ann_two, cache): """Combine the annotations from both sides.""" cdef Py_ssize_t pos_one, pos_two, len_one, len_two cdef Py_ssize_t out_pos cdef PyObject *temp, *left, *right if (PyObject_RichCompareBool(ann_one, ann_two, Py_LT)): cache_key = (ann_one, ann_two) else: cache_key = (ann_two, ann_one) temp = PyDict_GetItem(cache, cache_key) if temp != NULL: return temp if not PyTuple_CheckExact(ann_one) or not PyTuple_CheckExact(ann_two): raise TypeError('annotations must be tuples') # We know that annotations are tuples, and that both sides are already # sorted, so we can just walk and update a new list. pos_one = -1 pos_two = -1 out_pos = 0 left = _next_tuple_entry(ann_one, &pos_one) right = _next_tuple_entry(ann_two, &pos_two) new_ann = PyTuple_New(PyTuple_GET_SIZE(ann_one) + PyTuple_GET_SIZE(ann_two)) while left != NULL and right != NULL: # left == right is done by PyObject_RichCompareBool_ptr, however it # avoids a function call for a very common case. Drops 'time bzr # annotate NEWS' from 7.25s to 7.16s, so it *is* a visible impact. if (left == right or PyObject_RichCompareBool_ptr(left, right, Py_EQ)): # Identical values, step both Py_INCREF_ptr(left) PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) left = _next_tuple_entry(ann_one, &pos_one) right = _next_tuple_entry(ann_two, &pos_two) elif (PyObject_RichCompareBool_ptr(left, right, Py_LT)): # left < right or right == NULL Py_INCREF_ptr(left) PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) left = _next_tuple_entry(ann_one, &pos_one) else: # right < left or left == NULL Py_INCREF_ptr(right) PyTuple_SET_ITEM_ptr(new_ann, out_pos, right) right = _next_tuple_entry(ann_two, &pos_two) out_pos = out_pos + 1 while left != NULL: Py_INCREF_ptr(left) PyTuple_SET_ITEM_ptr(new_ann, out_pos, left) left = _next_tuple_entry(ann_one, &pos_one) out_pos = out_pos + 1 while right != NULL: Py_INCREF_ptr(right) PyTuple_SET_ITEM_ptr(new_ann, out_pos, right) right = _next_tuple_entry(ann_two, &pos_two) out_pos = out_pos + 1 if out_pos != PyTuple_GET_SIZE(new_ann): # Timing _PyTuple_Resize was not significantly faster that slicing # PyTuple_Resize((new_ann), out_pos) new_ann = new_ann[0:out_pos] PyDict_SetItem(cache, cache_key, new_ann) return new_ann cdef int _apply_parent_annotations(annotations, parent_annotations, matching_blocks) except -1: """Apply the annotations from parent_annotations into annotations. matching_blocks defines the ranges that match. """ cdef Py_ssize_t parent_idx, lines_idx, match_len, idx cdef PyListObject *par_list, *ann_list cdef PyObject **par_temp, **ann_temp _check_annotations_are_lists(annotations, parent_annotations) par_list = parent_annotations ann_list = annotations # For NEWS and bzrlib/builtins.py, over 99% of the lines are simply copied # across from the parent entry. So this routine is heavily optimized for # that. Would be interesting if we could use memcpy() but we have to incref # and decref for parent_idx, lines_idx, match_len in matching_blocks: _check_match_ranges(parent_annotations, annotations, parent_idx, lines_idx, match_len) par_temp = par_list.ob_item + parent_idx ann_temp = ann_list.ob_item + lines_idx for idx from 0 <= idx < match_len: Py_INCREF_ptr(par_temp[idx]) Py_DECREF_ptr(ann_temp[idx]) ann_temp[idx] = par_temp[idx] return 0 cdef int _merge_annotations(this_annotation, annotations, parent_annotations, matching_blocks, ann_cache) except -1: cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx cdef Py_ssize_t pos cdef PyObject *ann_temp, *par_temp _check_annotations_are_lists(annotations, parent_annotations) last_ann = None last_parent = None last_res = None for parent_idx, lines_idx, match_len in matching_blocks: _check_match_ranges(parent_annotations, annotations, parent_idx, lines_idx, match_len) # For lines which match this parent, we will now resolve whether # this parent wins over the current annotation for idx from 0 <= idx < match_len: ann_idx = lines_idx + idx ann_temp = PyList_GET_ITEM(annotations, ann_idx) par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx) if (ann_temp == par_temp): # This is parent, do nothing # Pointer comparison is fine here. Value comparison would # be ok, but it will be handled in the final if clause by # merging the two tuples into the same tuple # Avoiding the Py_INCREF and function call to # PyObject_RichCompareBool using pointer comparison drops # timing from 215ms => 125ms continue par_ann = par_temp ann = ann_temp if (ann is this_annotation): # Originally claimed 'this', but it was really in this # parent Py_INCREF(par_ann) PyList_SetItem(annotations, ann_idx, par_ann) continue # Resolve the fact that both sides have a different value for # last modified if (ann is last_ann and par_ann is last_parent): Py_INCREF(last_res) PyList_SetItem(annotations, ann_idx, last_res) else: new_ann = _combine_annotations(ann, par_ann, ann_cache) Py_INCREF(new_ann) PyList_SetItem(annotations, ann_idx, new_ann) last_ann = ann last_parent = par_ann last_res = new_ann return 0 class Annotator(_annotator_py.Annotator): """Class that drives performing annotations.""" def _update_from_first_parent(self, key, annotations, lines, parent_key): """Reannotate this text relative to its first parent.""" (parent_annotations, matching_blocks) = self._get_parent_annotations_and_matches( key, lines, parent_key) _apply_parent_annotations(annotations, parent_annotations, matching_blocks) def _update_from_other_parents(self, key, annotations, lines, this_annotation, parent_key): """Reannotate this text relative to a second (or more) parent.""" (parent_annotations, matching_blocks) = self._get_parent_annotations_and_matches( key, lines, parent_key) _merge_annotations(this_annotation, annotations, parent_annotations, matching_blocks, self._ann_tuple_cache) def annotate_flat(self, key): """Determine the single-best-revision to source for each line. This is meant as a compatibility thunk to how annotate() used to work. """ cdef Py_ssize_t pos, num_lines from bzrlib import annotate custom_tiebreaker = annotate._break_annotation_tie annotations, lines = self.annotate(key) num_lines = len(lines) out = [] heads = self._get_heads_provider().heads for pos from 0 <= pos < num_lines: annotation = annotations[pos] line = lines[pos] if len(annotation) == 1: head = annotation[0] else: the_heads = heads(annotation) if len(the_heads) == 1: for head in the_heads: break # get the item out of the set else: # We need to resolve the ambiguity, for now just pick the # sorted smallest head = self._resolve_annotation_tie(the_heads, line, custom_tiebreaker) PyList_Append(out, (head, line)) return out bzr-2.7.0/bzrlib/_bencode_pyx.h0000644000000000000000000000173111211775531014543 0ustar 00000000000000/* Copyright (C) 2009 Canonical Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* Simple header providing some macro definitions for _bencode_pyx.pyx */ #define D_UPDATE_TAIL(self, n) (((self)->size -= (n), (self)->tail += (n))) #define E_UPDATE_TAIL(self, n) (((self)->size += (n), (self)->tail += (n))) bzr-2.7.0/bzrlib/_bencode_pyx.pyx0000644000000000000000000003034511337021464015135 0ustar 00000000000000# Copyright (C) 2007, 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Pyrex implementation for bencode coder/decoder""" cdef extern from "stddef.h": ctypedef unsigned int size_t cdef extern from "Python.h": ctypedef int Py_ssize_t int PyInt_CheckExact(object o) int PyLong_CheckExact(object o) int PyString_CheckExact(object o) int PyTuple_CheckExact(object o) int PyList_CheckExact(object o) int PyDict_CheckExact(object o) int PyBool_Check(object o) object PyString_FromStringAndSize(char *v, Py_ssize_t len) char *PyString_AS_STRING(object o) except NULL Py_ssize_t PyString_GET_SIZE(object o) except -1 object PyInt_FromString(char *str, char **pend, int base) int Py_GetRecursionLimit() int Py_EnterRecursiveCall(char *) void Py_LeaveRecursiveCall() int PyList_Append(object, object) except -1 cdef extern from "stdlib.h": void free(void *memblock) void *malloc(size_t size) void *realloc(void *memblock, size_t size) long strtol(char *, char **, int) cdef extern from "string.h": void *memcpy(void *dest, void *src, size_t count) cdef extern from "python-compat.h": int snprintf(char* buffer, size_t nsize, char* fmt, ...) cdef class Decoder cdef class Encoder cdef extern from "_bencode_pyx.h": void D_UPDATE_TAIL(Decoder, int n) void E_UPDATE_TAIL(Encoder, int n) # To maintain compatibility with older versions of pyrex, we have to use the # relative import here, rather than 'bzrlib._static_tuple_c' from _static_tuple_c cimport StaticTuple, StaticTuple_CheckExact, \ import_static_tuple_c import_static_tuple_c() cdef class Decoder: """Bencode decoder""" cdef readonly char *tail cdef readonly int size cdef readonly int _yield_tuples cdef object text def __init__(self, s, yield_tuples=0): """Initialize decoder engine. @param s: Python string. """ if not PyString_CheckExact(s): raise TypeError("String required") self.text = s self.tail = PyString_AS_STRING(s) self.size = PyString_GET_SIZE(s) self._yield_tuples = int(yield_tuples) def decode(self): result = self._decode_object() if self.size != 0: raise ValueError('junk in stream') return result def decode_object(self): return self._decode_object() cdef object _decode_object(self): cdef char ch if 0 == self.size: raise ValueError('stream underflow') if Py_EnterRecursiveCall("_decode_object"): raise RuntimeError("too deeply nested") try: ch = self.tail[0] if c'0' <= ch <= c'9': return self._decode_string() elif ch == c'l': D_UPDATE_TAIL(self, 1) return self._decode_list() elif ch == c'i': D_UPDATE_TAIL(self, 1) return self._decode_int() elif ch == c'd': D_UPDATE_TAIL(self, 1) return self._decode_dict() else: raise ValueError('unknown object type identifier %r' % ch) finally: Py_LeaveRecursiveCall() cdef int _read_digits(self, char stop_char) except -1: cdef int i i = 0 while ((self.tail[i] >= c'0' and self.tail[i] <= c'9') or self.tail[i] == c'-') and i < self.size: i = i + 1 if self.tail[i] != stop_char: raise ValueError("Stop character %c not found: %c" % (stop_char, self.tail[i])) if (self.tail[0] == c'0' or (self.tail[0] == c'-' and self.tail[1] == c'0')): if i == 1: return i else: raise ValueError # leading zeroes are not allowed return i cdef object _decode_int(self): cdef int i i = self._read_digits(c'e') self.tail[i] = 0 try: ret = PyInt_FromString(self.tail, NULL, 10) finally: self.tail[i] = c'e' D_UPDATE_TAIL(self, i+1) return ret cdef object _decode_string(self): cdef int n cdef char *next_tail # strtol allows leading whitespace, negatives, and leading zeros # however, all callers have already checked that '0' <= tail[0] <= '9' # or they wouldn't have called _decode_string # strtol will stop at trailing whitespace, etc n = strtol(self.tail, &next_tail, 10) if next_tail == NULL or next_tail[0] != c':': raise ValueError('string len not terminated by ":"') # strtol allows leading zeros, so validate that we don't have that if (self.tail[0] == c'0' and (n != 0 or (next_tail - self.tail != 1))): raise ValueError('leading zeros are not allowed') D_UPDATE_TAIL(self, next_tail - self.tail + 1) if n == 0: return '' if n > self.size: raise ValueError('stream underflow') if n < 0: raise ValueError('string size below zero: %d' % n) result = PyString_FromStringAndSize(self.tail, n) D_UPDATE_TAIL(self, n) return result cdef object _decode_list(self): result = [] while self.size > 0: if self.tail[0] == c'e': D_UPDATE_TAIL(self, 1) if self._yield_tuples: return tuple(result) else: return result else: # As a quick shortcut, check to see if the next object is a # string, since we know that won't be creating recursion # if self.tail[0] >= c'0' and self.tail[0] <= c'9': PyList_Append(result, self._decode_object()) raise ValueError('malformed list') cdef object _decode_dict(self): cdef char ch result = {} lastkey = None while self.size > 0: ch = self.tail[0] if ch == c'e': D_UPDATE_TAIL(self, 1) return result else: # keys should be strings only if self.tail[0] < c'0' or self.tail[0] > c'9': raise ValueError('key was not a simple string.') key = self._decode_string() if lastkey >= key: raise ValueError('dict keys disordered') else: lastkey = key value = self._decode_object() result[key] = value raise ValueError('malformed dict') def bdecode(object s): """Decode string x to Python object""" return Decoder(s).decode() def bdecode_as_tuple(object s): """Decode string x to Python object, using tuples rather than lists.""" return Decoder(s, True).decode() class Bencached(object): __slots__ = ['bencoded'] def __init__(self, s): self.bencoded = s cdef enum: INITSIZE = 1024 # initial size for encoder buffer INT_BUF_SIZE = 32 cdef class Encoder: """Bencode encoder""" cdef readonly char *tail cdef readonly int size cdef readonly char *buffer cdef readonly int maxsize def __init__(self, int maxsize=INITSIZE): """Initialize encoder engine @param maxsize: initial size of internal char buffer """ cdef char *p self.maxsize = 0 self.size = 0 self.tail = NULL p = malloc(maxsize) if p == NULL: raise MemoryError('Not enough memory to allocate buffer ' 'for encoder') self.buffer = p self.maxsize = maxsize self.tail = p def __dealloc__(self): free(self.buffer) self.buffer = NULL self.maxsize = 0 def __str__(self): if self.buffer != NULL and self.size != 0: return PyString_FromStringAndSize(self.buffer, self.size) else: return '' cdef int _ensure_buffer(self, int required) except 0: """Ensure that tail of CharTail buffer has enough size. If buffer is not big enough then function try to realloc buffer. """ cdef char *new_buffer cdef int new_size if self.size + required < self.maxsize: return 1 new_size = self.maxsize while new_size < self.size + required: new_size = new_size * 2 new_buffer = realloc(self.buffer, new_size) if new_buffer == NULL: raise MemoryError('Cannot realloc buffer for encoder') self.buffer = new_buffer self.maxsize = new_size self.tail = &new_buffer[self.size] return 1 cdef int _encode_int(self, int x) except 0: """Encode int to bencode string iNNNe @param x: value to encode """ cdef int n self._ensure_buffer(INT_BUF_SIZE) n = snprintf(self.tail, INT_BUF_SIZE, "i%de", x) if n < 0: raise MemoryError('int %d too big to encode' % x) E_UPDATE_TAIL(self, n) return 1 cdef int _encode_long(self, x) except 0: return self._append_string(''.join(('i', str(x), 'e'))) cdef int _append_string(self, s) except 0: cdef Py_ssize_t n n = PyString_GET_SIZE(s) self._ensure_buffer(n) memcpy(self.tail, PyString_AS_STRING(s), n) E_UPDATE_TAIL(self, n) return 1 cdef int _encode_string(self, x) except 0: cdef int n cdef Py_ssize_t x_len x_len = PyString_GET_SIZE(x) self._ensure_buffer(x_len + INT_BUF_SIZE) n = snprintf(self.tail, INT_BUF_SIZE, '%d:', x_len) if n < 0: raise MemoryError('string %s too big to encode' % x) memcpy((self.tail+n), PyString_AS_STRING(x), x_len) E_UPDATE_TAIL(self, n + x_len) return 1 cdef int _encode_list(self, x) except 0: self._ensure_buffer(1) self.tail[0] = c'l' E_UPDATE_TAIL(self, 1) for i in x: self.process(i) self._ensure_buffer(1) self.tail[0] = c'e' E_UPDATE_TAIL(self, 1) return 1 cdef int _encode_dict(self, x) except 0: self._ensure_buffer(1) self.tail[0] = c'd' E_UPDATE_TAIL(self, 1) keys = x.keys() keys.sort() for k in keys: if not PyString_CheckExact(k): raise TypeError('key in dict should be string') self._encode_string(k) self.process(x[k]) self._ensure_buffer(1) self.tail[0] = c'e' E_UPDATE_TAIL(self, 1) return 1 def process(self, object x): if Py_EnterRecursiveCall("encode"): raise RuntimeError("too deeply nested") try: if PyString_CheckExact(x): self._encode_string(x) elif PyInt_CheckExact(x): self._encode_int(x) elif PyLong_CheckExact(x): self._encode_long(x) elif (PyList_CheckExact(x) or PyTuple_CheckExact(x) or StaticTuple_CheckExact(x)): self._encode_list(x) elif PyDict_CheckExact(x): self._encode_dict(x) elif PyBool_Check(x): self._encode_int(int(x)) elif isinstance(x, Bencached): self._append_string(x.bencoded) else: raise TypeError('unsupported type %r' % x) finally: Py_LeaveRecursiveCall() def bencode(x): """Encode Python object x to string""" encoder = Encoder() encoder.process(x) return str(encoder) bzr-2.7.0/bzrlib/_btree_serializer_py.py0000644000000000000000000000560211673635356016524 0ustar 00000000000000# Copyright (C) 2008, 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # """B+Tree index parsing.""" from __future__ import absolute_import from bzrlib import static_tuple def _parse_leaf_lines(bytes, key_length, ref_list_length): lines = bytes.split('\n') nodes = [] as_st = static_tuple.StaticTuple.from_sequence stuple = static_tuple.StaticTuple for line in lines[1:]: if line == '': return nodes elements = line.split('\0', key_length) # keys are tuples key = as_st(elements[:key_length]).intern() line = elements[-1] references, value = line.rsplit('\0', 1) if ref_list_length: ref_lists = [] for ref_string in references.split('\t'): ref_list = as_st([as_st(ref.split('\0')).intern() for ref in ref_string.split('\r') if ref]) ref_lists.append(ref_list) ref_lists = as_st(ref_lists) node_value = stuple(value, ref_lists) else: node_value = stuple(value, stuple()) # No need for StaticTuple here as it is put into a dict nodes.append((key, node_value)) return nodes def _flatten_node(node, reference_lists): """Convert a node into the serialized form. :param node: A tuple representing a node (key_tuple, value, references) :param reference_lists: Does this index have reference lists? :return: (string_key, flattened) string_key The serialized key for referencing this node flattened A string with the serialized form for the contents """ if reference_lists: # TODO: Consider turning this back into the 'unoptimized' nested loop # form. It is probably more obvious for most people, and this is # just a reference implementation. flattened_references = ['\r'.join(['\x00'.join(reference) for reference in ref_list]) for ref_list in node[3]] else: flattened_references = [] string_key = '\x00'.join(node[1]) line = ("%s\x00%s\x00%s\n" % (string_key, '\t'.join(flattened_references), node[2])) return string_key, line bzr-2.7.0/bzrlib/_btree_serializer_pyx.pyx0000644000000000000000000011377211562472200017074 0ustar 00000000000000# Copyright (C) 2008, 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # """Pyrex extensions to btree node parsing.""" #python2.4 support cdef extern from "python-compat.h": pass cdef extern from "stdlib.h": ctypedef unsigned size_t cdef extern from "Python.h": ctypedef int Py_ssize_t # Required for older pyrex versions ctypedef struct PyObject: pass int PyList_Append(object lst, object item) except -1 char *PyString_AsString(object p) except NULL object PyString_FromStringAndSize(char *, Py_ssize_t) PyObject *PyString_FromStringAndSize_ptr "PyString_FromStringAndSize" (char *, Py_ssize_t) object PyString_FromFormat(char *, ...) int PyString_CheckExact(object s) int PyString_CheckExact_ptr "PyString_CheckExact" (PyObject *) Py_ssize_t PyString_Size(object p) Py_ssize_t PyString_GET_SIZE_ptr "PyString_GET_SIZE" (PyObject *) char * PyString_AS_STRING_ptr "PyString_AS_STRING" (PyObject *) char * PyString_AS_STRING(object) Py_ssize_t PyString_GET_SIZE(object) int PyString_AsStringAndSize_ptr(PyObject *, char **buf, Py_ssize_t *len) void PyString_InternInPlace(PyObject **) int PyTuple_CheckExact(object t) object PyTuple_New(Py_ssize_t n_entries) void PyTuple_SET_ITEM(object, Py_ssize_t offset, object) # steals the ref Py_ssize_t PyTuple_GET_SIZE(object t) PyObject *PyTuple_GET_ITEM_ptr_object "PyTuple_GET_ITEM" (object tpl, int index) void Py_INCREF(object) void Py_DECREF_ptr "Py_DECREF" (PyObject *) void *PyMem_Malloc(size_t nbytes) void PyMem_Free(void *) void memset(void *, int, size_t) cdef extern from "string.h": void *memcpy(void *dest, void *src, size_t n) void *memchr(void *s, int c, size_t n) int memcmp(void *s1, void *s2, size_t n) # GNU extension # void *memrchr(void *s, int c, size_t n) int strncmp(char *s1, char *s2, size_t n) unsigned long strtoul(char *s1, char **out, int base) long long strtoll(char *s1, char **out, int base) # It seems we need to import the definitions so that the pyrex compiler has # local names to access them. from _static_tuple_c cimport StaticTuple, \ import_static_tuple_c, StaticTuple_New, \ StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact, \ StaticTuple_GET_SIZE, StaticTuple_GET_ITEM # This tells the test infrastructure that StaticTuple is a class, so we don't # have to worry about exception checking. ## extern cdef class StaticTuple import sys # TODO: Find some way to import this from _dirstate_helpers cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise # memrchr seems to be a GNU extension, so we have to implement it ourselves # It is not present in any win32 standard library cdef char *pos cdef char *start start = s pos = start + n - 1 while pos >= start: if pos[0] == c: return pos pos = pos - 1 return NULL # TODO: Import this from _dirstate_helpers when it is merged cdef object safe_string_from_size(char *s, Py_ssize_t size): if size < 0: raise AssertionError( 'tried to create a string with an invalid size: %d @0x%x' % (size, s)) return PyString_FromStringAndSize(s, size) cdef object safe_interned_string_from_size(char *s, Py_ssize_t size): cdef PyObject *py_str if size < 0: raise AssertionError( 'tried to create a string with an invalid size: %d @0x%x' % (size, s)) py_str = PyString_FromStringAndSize_ptr(s, size) PyString_InternInPlace(&py_str) result = py_str # Casting a PyObject* to an triggers an INCREF from Pyrex, so we # DECREF it to avoid geting immortal strings Py_DECREF_ptr(py_str) return result # This sets up the StaticTuple C_API functionality import_static_tuple_c() cdef class BTreeLeafParser: """Parse the leaf nodes of a BTree index. :ivar bytes: The PyString object containing the uncompressed text for the node. :ivar key_length: An integer describing how many pieces the keys have for this index. :ivar ref_list_length: An integer describing how many references this index contains. :ivar keys: A PyList of keys found in this node. :ivar _cur_str: A pointer to the start of the next line to parse :ivar _end_str: A pointer to the end of bytes :ivar _start: Pointer to the location within the current line while parsing. :ivar _header_found: True when we have parsed the header for this node """ cdef object bytes cdef int key_length cdef int ref_list_length cdef object keys cdef char * _cur_str cdef char * _end_str # The current start point for parsing cdef char * _start cdef int _header_found def __init__(self, bytes, key_length, ref_list_length): self.bytes = bytes self.key_length = key_length self.ref_list_length = ref_list_length self.keys = [] self._cur_str = NULL self._end_str = NULL self._header_found = 0 # keys are tuples cdef extract_key(self, char * last): """Extract a key. :param last: points at the byte after the last byte permitted for the key. """ cdef char *temp_ptr cdef int loop_counter cdef StaticTuple key key = StaticTuple_New(self.key_length) for loop_counter from 0 <= loop_counter < self.key_length: # grab a key segment temp_ptr = memchr(self._start, c'\0', last - self._start) if temp_ptr == NULL: if loop_counter + 1 == self.key_length: # capture to last temp_ptr = last else: # Invalid line failure_string = ("invalid key, wanted segment from " + repr(safe_string_from_size(self._start, last - self._start))) raise AssertionError(failure_string) # capture the key string if (self.key_length == 1 and (temp_ptr - self._start) == 45 and strncmp(self._start, 'sha1:', 5) == 0): key_element = safe_string_from_size(self._start, temp_ptr - self._start) else: key_element = safe_interned_string_from_size(self._start, temp_ptr - self._start) # advance our pointer self._start = temp_ptr + 1 Py_INCREF(key_element) StaticTuple_SET_ITEM(key, loop_counter, key_element) key = StaticTuple_Intern(key) return key cdef int process_line(self) except -1: """Process a line in the bytes.""" cdef char *last cdef char *temp_ptr cdef char *ref_ptr cdef char *next_start cdef int loop_counter cdef Py_ssize_t str_len self._start = self._cur_str # Find the next newline last = memchr(self._start, c'\n', self._end_str - self._start) if last == NULL: # Process until the end of the file last = self._end_str self._cur_str = self._end_str else: # And the next string is right after it self._cur_str = last + 1 # The last character is right before the '\n' if last == self._start: # parsed it all. return 0 if last < self._start: # Unexpected error condition - fail raise AssertionError("last < self._start") if 0 == self._header_found: # The first line in a leaf node is the header "type=leaf\n" if strncmp("type=leaf", self._start, last - self._start) == 0: self._header_found = 1 return 0 else: raise AssertionError('Node did not start with "type=leaf": %r' % (safe_string_from_size(self._start, last - self._start))) key = self.extract_key(last) # find the value area temp_ptr = _my_memrchr(self._start, c'\0', last - self._start) if temp_ptr == NULL: # Invalid line raise AssertionError("Failed to find the value area") else: # Because of how conversions were done, we ended up with *lots* of # values that are identical. These are all of the 0-length nodes # that are referred to by the TREE_ROOT (and likely some other # directory nodes.) For example, bzr has 25k references to # something like '12607215 328306 0 0', which ends up consuming 1MB # of memory, just for those strings. str_len = last - temp_ptr - 1 if (str_len > 4 and strncmp(" 0 0", last - 4, 4) == 0): # This drops peak mem for bzr.dev from 87.4MB => 86.2MB # For Launchpad 236MB => 232MB value = safe_interned_string_from_size(temp_ptr + 1, str_len) else: value = safe_string_from_size(temp_ptr + 1, str_len) # shrink the references end point last = temp_ptr if self.ref_list_length: ref_lists = StaticTuple_New(self.ref_list_length) loop_counter = 0 while loop_counter < self.ref_list_length: ref_list = [] # extract a reference list loop_counter = loop_counter + 1 if last < self._start: raise AssertionError("last < self._start") # find the next reference list end point: temp_ptr = memchr(self._start, c'\t', last - self._start) if temp_ptr == NULL: # Only valid for the last list if loop_counter != self.ref_list_length: # Invalid line raise AssertionError( "invalid key, loop_counter != self.ref_list_length") else: # scan to the end of the ref list area ref_ptr = last next_start = last else: # scan to the end of this ref list ref_ptr = temp_ptr next_start = temp_ptr + 1 # Now, there may be multiple keys in the ref list. while self._start < ref_ptr: # loop finding keys and extracting them temp_ptr = memchr(self._start, c'\r', ref_ptr - self._start) if temp_ptr == NULL: # key runs to the end temp_ptr = ref_ptr PyList_Append(ref_list, self.extract_key(temp_ptr)) ref_list = StaticTuple_Intern(StaticTuple(*ref_list)) Py_INCREF(ref_list) StaticTuple_SET_ITEM(ref_lists, loop_counter - 1, ref_list) # prepare for the next reference list self._start = next_start node_value = StaticTuple(value, ref_lists) else: if last != self._start: # unexpected reference data present raise AssertionError("unexpected reference data present") node_value = StaticTuple(value, StaticTuple()) PyList_Append(self.keys, StaticTuple(key, node_value)) return 0 def parse(self): cdef Py_ssize_t byte_count if not PyString_CheckExact(self.bytes): raise AssertionError('self.bytes is not a string.') byte_count = PyString_Size(self.bytes) self._cur_str = PyString_AsString(self.bytes) # This points to the last character in the string self._end_str = self._cur_str + byte_count while self._cur_str < self._end_str: self.process_line() return self.keys def _parse_leaf_lines(bytes, key_length, ref_list_length): parser = BTreeLeafParser(bytes, key_length, ref_list_length) return parser.parse() # TODO: We can go from 8 byte offset + 4 byte length to a simple lookup, # because the block_offset + length is likely to be repeated. However, # the big win there is to cache across pages, and not just one page # Though if we did cache in a page, we could certainly use a short int. # And this goes from 40 bytes to 30 bytes. # One slightly ugly option would be to cache block offsets in a global. # However, that leads to thread-safety issues, etc. ctypedef struct gc_chk_sha1_record: long long block_offset unsigned int block_length unsigned int record_start unsigned int record_end char sha1[20] cdef int _unhexbuf[256] cdef char *_hexbuf _hexbuf = '0123456789abcdef' cdef _populate_unhexbuf(): cdef int i for i from 0 <= i < 256: _unhexbuf[i] = -1 for i from 0 <= i < 10: # 0123456789 => map to the raw number _unhexbuf[(i + c'0')] = i for i from 10 <= i < 16: # abcdef => 10, 11, 12, 13, 14, 15, 16 _unhexbuf[(i - 10 + c'a')] = i for i from 10 <= i < 16: # ABCDEF => 10, 11, 12, 13, 14, 15, 16 _unhexbuf[(i - 10 + c'A')] = i _populate_unhexbuf() cdef int _unhexlify_sha1(char *as_hex, char *as_bin): # cannot_raise """Take the hex sha1 in as_hex and make it binary in as_bin Same as binascii.unhexlify, but working on C strings, not Python objects. """ cdef int top cdef int bot cdef int i, j cdef char *cur # binascii does this using isupper() and tolower() and ?: syntax. I'm # guessing a simple lookup array should be faster. j = 0 for i from 0 <= i < 20: top = _unhexbuf[(as_hex[j])] j = j + 1 bot = _unhexbuf[(as_hex[j])] j = j + 1 if top == -1 or bot == -1: return 0 as_bin[i] = ((top << 4) + bot); return 1 def _py_unhexlify(as_hex): """For the test infrastructure, just thunks to _unhexlify_sha1""" if len(as_hex) != 40 or not PyString_CheckExact(as_hex): raise ValueError('not a 40-byte hex digest') as_bin = PyString_FromStringAndSize(NULL, 20) if _unhexlify_sha1(PyString_AS_STRING(as_hex), PyString_AS_STRING(as_bin)): return as_bin return None cdef void _hexlify_sha1(char *as_bin, char *as_hex): # cannot_raise cdef int i, j cdef char c j = 0 for i from 0 <= i < 20: c = as_bin[i] as_hex[j] = _hexbuf[(c>>4)&0xf] j = j + 1 as_hex[j] = _hexbuf[(c)&0xf] j = j + 1 def _py_hexlify(as_bin): """For test infrastructure, thunk to _hexlify_sha1""" if len(as_bin) != 20 or not PyString_CheckExact(as_bin): raise ValueError('not a 20-byte binary digest') as_hex = PyString_FromStringAndSize(NULL, 40) _hexlify_sha1(PyString_AS_STRING(as_bin), PyString_AS_STRING(as_hex)) return as_hex cdef int _key_to_sha1(key, char *sha1): # cannot_raise """Map a key into its sha1 content. :param key: A tuple of style ('sha1:abcd...',) :param sha1: A char buffer of 20 bytes :return: 1 if this could be converted, 0 otherwise """ cdef char *c_val cdef PyObject *p_val if StaticTuple_CheckExact(key) and StaticTuple_GET_SIZE(key) == 1: p_val = StaticTuple_GET_ITEM(key, 0) elif (PyTuple_CheckExact(key) and PyTuple_GET_SIZE(key) == 1): p_val = PyTuple_GET_ITEM_ptr_object(key, 0) else: # Not a tuple or a StaticTuple return 0 if (PyString_CheckExact_ptr(p_val) and PyString_GET_SIZE_ptr(p_val) == 45): c_val = PyString_AS_STRING_ptr(p_val) else: return 0 if strncmp(c_val, 'sha1:', 5) != 0: return 0 if not _unhexlify_sha1(c_val + 5, sha1): return 0 return 1 def _py_key_to_sha1(key): """Map a key to a simple sha1 string. This is a testing thunk to the C function. """ as_bin_sha = PyString_FromStringAndSize(NULL, 20) if _key_to_sha1(key, PyString_AS_STRING(as_bin_sha)): return as_bin_sha return None cdef StaticTuple _sha1_to_key(char *sha1): """Compute a ('sha1:abcd',) key for a given sha1.""" cdef StaticTuple key cdef object hexxed cdef char *c_buf hexxed = PyString_FromStringAndSize(NULL, 45) c_buf = PyString_AS_STRING(hexxed) memcpy(c_buf, 'sha1:', 5) _hexlify_sha1(sha1, c_buf+5) key = StaticTuple_New(1) Py_INCREF(hexxed) StaticTuple_SET_ITEM(key, 0, hexxed) # This is a bit expensive. To parse 120 keys takes 48us, to return them all # can be done in 66.6us (so 18.6us to build them all). # Adding simple hash() here brings it to 76.6us (so computing the hash # value of 120keys is 10us), Intern is 86.9us (another 10us to look and add # them to the intern structure.) # However, since we only intern keys that are in active use, it is probably # a win. Since they would have been read from elsewhere anyway. # We *could* hang the PyObject form off of the gc_chk_sha1_record for ones # that we have deserialized. Something to think about, at least. key = StaticTuple_Intern(key) return key def _py_sha1_to_key(sha1_bin): """Test thunk to check the sha1 mapping.""" if not PyString_CheckExact(sha1_bin) or PyString_GET_SIZE(sha1_bin) != 20: raise ValueError('sha1_bin must be a str of exactly 20 bytes') return _sha1_to_key(PyString_AS_STRING(sha1_bin)) cdef unsigned int _sha1_to_uint(char *sha1): # cannot_raise cdef unsigned int val # Must be in MSB, because that is how the content is sorted val = ((((sha1[0]) & 0xff) << 24) | (((sha1[1]) & 0xff) << 16) | (((sha1[2]) & 0xff) << 8) | (((sha1[3]) & 0xff) << 0)) return val cdef _format_record(gc_chk_sha1_record *record): # This is inefficient to go from a logical state back to a # string, but it makes things work a bit better internally for now. if record.block_offset >= 0xFFFFFFFF: # %llu is what we really want, but unfortunately it was only added # in python 2.7... :( block_offset_str = str(record.block_offset) value = PyString_FromFormat('%s %u %u %u', PyString_AS_STRING(block_offset_str), record.block_length, record.record_start, record.record_end) else: value = PyString_FromFormat('%lu %u %u %u', record.block_offset, record.block_length, record.record_start, record.record_end) return value cdef class GCCHKSHA1LeafNode: """Track all the entries for a given leaf node.""" cdef gc_chk_sha1_record *records cdef public object last_key cdef gc_chk_sha1_record *last_record cdef public int num_records # This is the number of bits to shift to get to the interesting byte. A # value of 24 means that the very first byte changes across all keys. # Anything else means that there is a common prefix of bits that we can # ignore. 0 means that at least the first 3 bytes are identical, though # that is going to be very rare cdef public unsigned char common_shift # This maps an interesting byte to the first record that matches. # Equivalent to bisect.bisect_left(self.records, sha1), though only taking # into account that one byte. cdef unsigned char offsets[257] def __sizeof__(self): # :( Why doesn't Pyrex let me do a simple sizeof(GCCHKSHA1LeafNode) # like Cython? Explicitly enumerating everything here seems to leave my # size off by 2 (286 bytes vs 288 bytes actual). I'm guessing it is an # alignment/padding issue. Oh well- at least we scale properly with # num_records and are very close to correct, which is what I care # about. # If we ever decide to require cython: # return (sizeof(GCCHKSHA1LeafNode) # + sizeof(gc_chk_sha1_record)*self.num_records) return (sizeof(PyObject) + sizeof(void*) + sizeof(int) + sizeof(gc_chk_sha1_record*) + sizeof(PyObject *) + sizeof(gc_chk_sha1_record*) + sizeof(char) + sizeof(unsigned char)*257 + sizeof(gc_chk_sha1_record)*self.num_records) def __dealloc__(self): if self.records != NULL: PyMem_Free(self.records) self.records = NULL def __init__(self, bytes): self._parse_bytes(bytes) self.last_key = None self.last_record = NULL property min_key: def __get__(self): if self.num_records > 0: return _sha1_to_key(self.records[0].sha1) return None property max_key: def __get__(self): if self.num_records > 0: return _sha1_to_key(self.records[self.num_records-1].sha1) return None cdef StaticTuple _record_to_value_and_refs(self, gc_chk_sha1_record *record): """Extract the refs and value part of this record.""" cdef StaticTuple value_and_refs cdef StaticTuple empty value_and_refs = StaticTuple_New(2) value = _format_record(record) Py_INCREF(value) StaticTuple_SET_ITEM(value_and_refs, 0, value) # Always empty refs empty = StaticTuple_New(0) Py_INCREF(empty) StaticTuple_SET_ITEM(value_and_refs, 1, empty) return value_and_refs cdef StaticTuple _record_to_item(self, gc_chk_sha1_record *record): """Turn a given record back into a fully fledged item. """ cdef StaticTuple item cdef StaticTuple key cdef StaticTuple value_and_refs cdef object value key = _sha1_to_key(record.sha1) item = StaticTuple_New(2) Py_INCREF(key) StaticTuple_SET_ITEM(item, 0, key) value_and_refs = self._record_to_value_and_refs(record) Py_INCREF(value_and_refs) StaticTuple_SET_ITEM(item, 1, value_and_refs) return item cdef gc_chk_sha1_record* _lookup_record(self, char *sha1) except? NULL: """Find a gc_chk_sha1_record that matches the sha1 supplied.""" cdef int lo, hi, mid, the_cmp cdef int offset # TODO: We can speed up misses by comparing this sha1 to the common # bits, and seeing if the common prefix matches, if not, we don't # need to search for anything because it cannot match # Use the offset array to find the closest fit for this entry # follow that up with bisecting, since multiple keys can be in one # spot # Bisecting dropped us from 7000 comparisons to 582 (4.8/key), using # the offset array dropped us from 23us to 20us and 156 comparisions # (1.3/key) offset = self._offset_for_sha1(sha1) lo = self.offsets[offset] hi = self.offsets[offset+1] if hi == 255: # if hi == 255 that means we potentially ran off the end of the # list, so push it up to num_records # note that if 'lo' == 255, that is ok, because we can start # searching from that part of the list. hi = self.num_records local_n_cmp = 0 while lo < hi: mid = (lo + hi) / 2 the_cmp = memcmp(self.records[mid].sha1, sha1, 20) if the_cmp == 0: return &self.records[mid] elif the_cmp < 0: lo = mid + 1 else: hi = mid return NULL def __contains__(self, key): cdef char sha1[20] cdef gc_chk_sha1_record *record if _key_to_sha1(key, sha1): # If it isn't a sha1 key, then it won't be in this leaf node record = self._lookup_record(sha1) if record != NULL: self.last_key = key self.last_record = record return True return False def __getitem__(self, key): cdef char sha1[20] cdef gc_chk_sha1_record *record record = NULL if self.last_record != NULL and key is self.last_key: record = self.last_record elif _key_to_sha1(key, sha1): record = self._lookup_record(sha1) if record == NULL: raise KeyError('key %r is not present' % (key,)) return self._record_to_value_and_refs(record) def __len__(self): return self.num_records def all_keys(self): cdef int i result = [] for i from 0 <= i < self.num_records: PyList_Append(result, _sha1_to_key(self.records[i].sha1)) return result def all_items(self): cdef int i result = [] for i from 0 <= i < self.num_records: item = self._record_to_item(&self.records[i]) PyList_Append(result, item) return result cdef int _count_records(self, char *c_content, char *c_end): # cannot_raise """Count how many records are in this section.""" cdef char *c_cur cdef int num_records c_cur = c_content num_records = 0 while c_cur != NULL and c_cur < c_end: c_cur = memchr(c_cur, c'\n', c_end - c_cur); if c_cur == NULL: break c_cur = c_cur + 1 num_records = num_records + 1 return num_records cdef _parse_bytes(self, bytes): """Parse the string 'bytes' into content.""" cdef char *c_bytes cdef char *c_cur cdef char *c_end cdef Py_ssize_t n_bytes cdef int num_records cdef int entry cdef gc_chk_sha1_record *cur_record if not PyString_CheckExact(bytes): raise TypeError('We only support parsing plain 8-bit strings.') # Pass 1, count how many records there will be n_bytes = PyString_GET_SIZE(bytes) c_bytes = PyString_AS_STRING(bytes) c_end = c_bytes + n_bytes if strncmp(c_bytes, 'type=leaf\n', 10): raise ValueError("bytes did not start with 'type=leaf\\n': %r" % (bytes[:10],)) c_cur = c_bytes + 10 num_records = self._count_records(c_cur, c_end) # Now allocate the memory for these items, and go to town self.records = PyMem_Malloc(num_records * (sizeof(unsigned short) + sizeof(gc_chk_sha1_record))) self.num_records = num_records cur_record = self.records entry = 0 while c_cur != NULL and c_cur < c_end and entry < num_records: c_cur = self._parse_one_entry(c_cur, c_end, cur_record) cur_record = cur_record + 1 entry = entry + 1 if (entry != self.num_records or c_cur != c_end or cur_record != self.records + self.num_records): raise ValueError('Something went wrong while parsing.') # Pass 3: build the offset map self._compute_common() cdef char *_parse_one_entry(self, char *c_cur, char *c_end, gc_chk_sha1_record *cur_record) except NULL: """Read a single sha record from the bytes. :param c_cur: The pointer to the start of bytes :param cur_record: """ cdef char *c_next if strncmp(c_cur, 'sha1:', 5): raise ValueError('line did not start with sha1: %r' % (safe_string_from_size(c_cur, 10),)) c_cur = c_cur + 5 c_next = memchr(c_cur, c'\0', c_end - c_cur) if c_next == NULL or (c_next - c_cur != 40): raise ValueError('Line did not contain 40 hex bytes') if not _unhexlify_sha1(c_cur, cur_record.sha1): raise ValueError('We failed to unhexlify') c_cur = c_next + 1 if c_cur[0] != c'\0': raise ValueError('only 1 null, not 2 as expected') c_cur = c_cur + 1 cur_record.block_offset = strtoll(c_cur, &c_next, 10) if c_cur == c_next or c_next[0] != c' ': raise ValueError('Failed to parse block offset') c_cur = c_next + 1 cur_record.block_length = strtoul(c_cur, &c_next, 10) if c_cur == c_next or c_next[0] != c' ': raise ValueError('Failed to parse block length') c_cur = c_next + 1 cur_record.record_start = strtoul(c_cur, &c_next, 10) if c_cur == c_next or c_next[0] != c' ': raise ValueError('Failed to parse block length') c_cur = c_next + 1 cur_record.record_end = strtoul(c_cur, &c_next, 10) if c_cur == c_next or c_next[0] != c'\n': raise ValueError('Failed to parse record end') c_cur = c_next + 1 return c_cur cdef int _offset_for_sha1(self, char *sha1) except -1: """Find the first interesting 8-bits of this sha1.""" cdef int this_offset cdef unsigned int as_uint as_uint = _sha1_to_uint(sha1) this_offset = (as_uint >> self.common_shift) & 0xFF return this_offset def _get_offset_for_sha1(self, sha1): return self._offset_for_sha1(PyString_AS_STRING(sha1)) cdef _compute_common(self): cdef unsigned int first cdef unsigned int this cdef unsigned int common_mask cdef unsigned char common_shift cdef int i cdef int offset, this_offset cdef int max_offset # The idea with the offset map is that we should be able to quickly # jump to the key that matches a gives sha1. We know that the keys are # in sorted order, and we know that a lot of the prefix is going to be # the same across them. # By XORing the records together, we can determine what bits are set in # all of them if self.num_records < 2: # Everything is in common if you have 0 or 1 leaves # So we'll always just shift to the first byte self.common_shift = 24 else: common_mask = 0xFFFFFFFF first = _sha1_to_uint(self.records[0].sha1) for i from 0 < i < self.num_records: this = _sha1_to_uint(self.records[i].sha1) common_mask = (~(first ^ this)) & common_mask common_shift = 24 while common_mask & 0x80000000 and common_shift > 0: common_mask = common_mask << 1 common_shift = common_shift - 1 self.common_shift = common_shift offset = 0 max_offset = self.num_records # We cap this loop at 254 records. All the other offsets just get # filled with 0xff as the singleton saying 'too many'. # It means that if we have >255 records we have to bisect the second # half of the list, but this is going to be very rare in practice. if max_offset > 255: max_offset = 255 for i from 0 <= i < max_offset: this_offset = self._offset_for_sha1(self.records[i].sha1) while offset <= this_offset: self.offsets[offset] = i offset = offset + 1 while offset < 257: self.offsets[offset] = max_offset offset = offset + 1 def _get_offsets(self): cdef int i result = [] for i from 0 <= i < 257: PyList_Append(result, self.offsets[i]) return result def _parse_into_chk(bytes, key_length, ref_list_length): """Parse into a format optimized for chk records.""" assert key_length == 1 assert ref_list_length == 0 return GCCHKSHA1LeafNode(bytes) def _flatten_node(node, reference_lists): """Convert a node into the serialized form. :param node: A tuple representing a node: (index, key_tuple, value, references) :param reference_lists: Does this index have reference lists? :return: (string_key, flattened) string_key The serialized key for referencing this node flattened A string with the serialized form for the contents """ cdef int have_reference_lists cdef Py_ssize_t flat_len cdef Py_ssize_t key_len cdef Py_ssize_t node_len cdef char * value cdef Py_ssize_t value_len cdef char * out cdef Py_ssize_t refs_len cdef Py_ssize_t next_len cdef int first_ref_list cdef int first_reference cdef int i cdef Py_ssize_t ref_bit_len if not PyTuple_CheckExact(node) and not StaticTuple_CheckExact(node): raise TypeError('We expected a tuple() or StaticTuple() for node not: %s' % type(node)) node_len = len(node) have_reference_lists = reference_lists if have_reference_lists: if node_len != 4: raise ValueError('With ref_lists, we expected 4 entries not: %s' % len(node)) elif node_len < 3: raise ValueError('Without ref_lists, we need at least 3 entries not: %s' % len(node)) # TODO: We can probably do better than string.join(), namely # when key has only 1 item, we can just grab that string # And when there are 2 items, we could do a single malloc + len() + 1 # also, doing .join() requires a PyObject_GetAttrString call, which # we could also avoid. # TODO: Note that pyrex 0.9.6 generates fairly crummy code here, using the # python object interface, versus 0.9.8+ which uses a helper that # checks if this supports the sequence interface. # We *could* do more work on our own, and grab the actual items # lists. For now, just ask people to use a better compiler. :) string_key = '\0'.join(node[1]) # TODO: instead of using string joins, precompute the final string length, # and then malloc a single string and copy everything in. # TODO: We probably want to use PySequenceFast, because we have lists and # tuples, but we aren't sure which we will get. # line := string_key NULL flat_refs NULL value LF # string_key := BYTES (NULL BYTES)* # flat_refs := ref_list (TAB ref_list)* # ref_list := ref (CR ref)* # ref := BYTES (NULL BYTES)* # value := BYTES refs_len = 0 if have_reference_lists: # Figure out how many bytes it will take to store the references ref_lists = node[3] next_len = len(ref_lists) # TODO: use a Py function if next_len > 0: # If there are no nodes, we don't need to do any work # Otherwise we will need (len - 1) '\t' characters to separate # the reference lists refs_len = refs_len + (next_len - 1) for ref_list in ref_lists: next_len = len(ref_list) if next_len > 0: # We will need (len - 1) '\r' characters to separate the # references refs_len = refs_len + (next_len - 1) for reference in ref_list: if (not PyTuple_CheckExact(reference) and not StaticTuple_CheckExact(reference)): raise TypeError( 'We expect references to be tuples not: %s' % type(reference)) next_len = len(reference) if next_len > 0: # We will need (len - 1) '\x00' characters to # separate the reference key refs_len = refs_len + (next_len - 1) for ref_bit in reference: if not PyString_CheckExact(ref_bit): raise TypeError('We expect reference bits' ' to be strings not: %s' % type(ref_bit)) refs_len = refs_len + PyString_GET_SIZE(ref_bit) # So we have the (key NULL refs NULL value LF) key_len = PyString_Size(string_key) val = node[2] if not PyString_CheckExact(val): raise TypeError('Expected a plain str for value not: %s' % type(val)) value = PyString_AS_STRING(val) value_len = PyString_GET_SIZE(val) flat_len = (key_len + 1 + refs_len + 1 + value_len + 1) line = PyString_FromStringAndSize(NULL, flat_len) # Get a pointer to the new buffer out = PyString_AsString(line) memcpy(out, PyString_AsString(string_key), key_len) out = out + key_len out[0] = c'\0' out = out + 1 if refs_len > 0: first_ref_list = 1 for ref_list in ref_lists: if first_ref_list == 0: out[0] = c'\t' out = out + 1 first_ref_list = 0 first_reference = 1 for reference in ref_list: if first_reference == 0: out[0] = c'\r' out = out + 1 first_reference = 0 next_len = len(reference) for i from 0 <= i < next_len: if i != 0: out[0] = c'\x00' out = out + 1 ref_bit = reference[i] ref_bit_len = PyString_GET_SIZE(ref_bit) memcpy(out, PyString_AS_STRING(ref_bit), ref_bit_len) out = out + ref_bit_len out[0] = c'\0' out = out + 1 memcpy(out, value, value_len) out = out + value_len out[0] = c'\n' return string_key, line bzr-2.7.0/bzrlib/_chk_map_py.py0000644000000000000000000001434711673635356014602 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Python implementation of _search_key functions, etc.""" from __future__ import absolute_import import zlib import struct from bzrlib.static_tuple import StaticTuple _LeafNode = None _InternalNode = None _unknown = None def _crc32(bit): # Depending on python version and platform, zlib.crc32 will return either a # signed (<= 2.5 >= 3.0) or an unsigned (2.5, 2.6). # http://docs.python.org/library/zlib.html recommends using a mask to force # an unsigned value to ensure the same numeric value (unsigned) is obtained # across all python versions and platforms. # Note: However, on 32-bit platforms this causes an upcast to PyLong, which # are generally slower than PyInts. However, if performance becomes # critical, we should probably write the whole thing as an extension # anyway. # Though we really don't need that 32nd bit of accuracy. (even 2**24 # is probably enough node fan out for realistic trees.) return zlib.crc32(bit)&0xFFFFFFFF def _search_key_16(key): """Map the key tuple into a search key string which has 16-way fan out.""" return '\x00'.join(['%08X' % _crc32(bit) for bit in key]) def _search_key_255(key): """Map the key tuple into a search key string which has 255-way fan out. We use 255-way because '\n' is used as a delimiter, and causes problems while parsing. """ bytes = '\x00'.join([struct.pack('>L', _crc32(bit)) for bit in key]) return bytes.replace('\n', '_') def _deserialise_leaf_node(bytes, key, search_key_func=None): """Deserialise bytes, with key key, into a LeafNode. :param bytes: The bytes of the node. :param key: The key that the serialised node has. """ global _unknown, _LeafNode, _InternalNode if _LeafNode is None: from bzrlib import chk_map _unknown = chk_map._unknown _LeafNode = chk_map.LeafNode _InternalNode = chk_map.InternalNode result = _LeafNode(search_key_func=search_key_func) # Splitlines can split on '\r' so don't use it, split('\n') adds an # extra '' if the bytes ends in a final newline. lines = bytes.split('\n') trailing = lines.pop() if trailing != '': raise AssertionError('We did not have a final newline for %s' % (key,)) items = {} if lines[0] != 'chkleaf:': raise ValueError("not a serialised leaf node: %r" % bytes) maximum_size = int(lines[1]) width = int(lines[2]) length = int(lines[3]) prefix = lines[4] pos = 5 while pos < len(lines): line = prefix + lines[pos] elements = line.split('\x00') pos += 1 if len(elements) != width + 1: raise AssertionError( 'Incorrect number of elements (%d vs %d) for: %r' % (len(elements), width + 1, line)) num_value_lines = int(elements[-1]) value_lines = lines[pos:pos+num_value_lines] pos += num_value_lines value = '\n'.join(value_lines) items[StaticTuple.from_sequence(elements[:-1])] = value if len(items) != length: raise AssertionError("item count (%d) mismatch for key %s," " bytes %r" % (length, key, bytes)) result._items = items result._len = length result._maximum_size = maximum_size result._key = key result._key_width = width result._raw_size = (sum(map(len, lines[5:])) # the length of the suffix + (length)*(len(prefix)) + (len(lines)-5)) if not items: result._search_prefix = None result._common_serialised_prefix = None else: result._search_prefix = _unknown result._common_serialised_prefix = prefix if len(bytes) != result._current_size(): raise AssertionError('_current_size computed incorrectly') return result def _deserialise_internal_node(bytes, key, search_key_func=None): global _unknown, _LeafNode, _InternalNode if _InternalNode is None: from bzrlib import chk_map _unknown = chk_map._unknown _LeafNode = chk_map.LeafNode _InternalNode = chk_map.InternalNode result = _InternalNode(search_key_func=search_key_func) # Splitlines can split on '\r' so don't use it, remove the extra '' # from the result of split('\n') because we should have a trailing # newline lines = bytes.split('\n') if lines[-1] != '': raise ValueError("last line must be ''") lines.pop(-1) items = {} if lines[0] != 'chknode:': raise ValueError("not a serialised internal node: %r" % bytes) maximum_size = int(lines[1]) width = int(lines[2]) length = int(lines[3]) common_prefix = lines[4] for line in lines[5:]: line = common_prefix + line prefix, flat_key = line.rsplit('\x00', 1) items[prefix] = StaticTuple(flat_key,) if len(items) == 0: raise AssertionError("We didn't find any item for %s" % key) result._items = items result._len = length result._maximum_size = maximum_size result._key = key result._key_width = width # XXX: InternalNodes don't really care about their size, and this will # change if we add prefix compression result._raw_size = None # len(bytes) result._node_width = len(prefix) result._search_prefix = common_prefix return result def _bytes_to_text_key(bytes): """Take a CHKInventory value string and return a (file_id, rev_id) tuple""" sections = bytes.split('\n') kind, file_id = sections[0].split(': ') return (intern(file_id), intern(sections[3])) bzr-2.7.0/bzrlib/_chk_map_pyx.pyx0000644000000000000000000004106511453516446015151 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #python2.4 support cdef extern from "python-compat.h": pass cdef extern from *: ctypedef unsigned int size_t int memcmp(void *, void*, size_t) void memcpy(void *, void*, size_t) void *memchr(void *s, int c, size_t len) long strtol(char *, char **, int) void sprintf(char *, char *, ...) cdef extern from "Python.h": ctypedef int Py_ssize_t # Required for older pyrex versions ctypedef struct PyObject: pass int PyTuple_CheckExact(object p) Py_ssize_t PyTuple_GET_SIZE(object t) int PyString_CheckExact(object) char *PyString_AS_STRING(object s) PyObject *PyString_FromStringAndSize_ptr "PyString_FromStringAndSize" (char *, Py_ssize_t) Py_ssize_t PyString_GET_SIZE(object) void PyString_InternInPlace(PyObject **) long PyInt_AS_LONG(object) int PyDict_SetItem(object d, object k, object v) except -1 void Py_INCREF(object) void Py_DECREF_ptr "Py_DECREF" (PyObject *) object PyString_FromStringAndSize(char*, Py_ssize_t) # cimport all of the definitions we will need to access from _static_tuple_c cimport StaticTuple,\ import_static_tuple_c, StaticTuple_New, \ StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact, \ StaticTuple_GET_SIZE cdef object crc32 from zlib import crc32 # Set up the StaticTuple C_API functionality import_static_tuple_c() cdef object _LeafNode _LeafNode = None cdef object _InternalNode _InternalNode = None cdef object _unknown _unknown = None # We shouldn't just copy this from _dirstate_helpers_pyx cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise # memrchr seems to be a GNU extension, so we have to implement it ourselves cdef char *pos cdef char *start start = s pos = start + n - 1 while pos >= start: if pos[0] == c: return pos pos = pos - 1 return NULL cdef object safe_interned_string_from_size(char *s, Py_ssize_t size): cdef PyObject *py_str if size < 0: raise AssertionError( 'tried to create a string with an invalid size: %d @0x%x' % (size, s)) py_str = PyString_FromStringAndSize_ptr(s, size) PyString_InternInPlace(&py_str) result = py_str # Casting a PyObject* to an triggers an INCREF from Pyrex, so we # DECREF it to avoid geting immortal strings Py_DECREF_ptr(py_str) return result def _search_key_16(key): """See chk_map._search_key_16.""" cdef Py_ssize_t num_bits cdef Py_ssize_t i, j cdef Py_ssize_t num_out_bytes cdef unsigned long crc_val cdef Py_ssize_t out_off cdef char *c_out num_bits = len(key) # 4 bytes per crc32, and another 1 byte between bits num_out_bytes = (9 * num_bits) - 1 out = PyString_FromStringAndSize(NULL, num_out_bytes) c_out = PyString_AS_STRING(out) for i from 0 <= i < num_bits: if i > 0: c_out[0] = c'\x00' c_out = c_out + 1 crc_val = PyInt_AS_LONG(crc32(key[i])) # Hex(val) order sprintf(c_out, '%08X', crc_val) c_out = c_out + 8 return out def _search_key_255(key): """See chk_map._search_key_255.""" cdef Py_ssize_t num_bits cdef Py_ssize_t i, j cdef Py_ssize_t num_out_bytes cdef unsigned long crc_val cdef Py_ssize_t out_off cdef char *c_out num_bits = len(key) # 4 bytes per crc32, and another 1 byte between bits num_out_bytes = (5 * num_bits) - 1 out = PyString_FromStringAndSize(NULL, num_out_bytes) c_out = PyString_AS_STRING(out) for i from 0 <= i < num_bits: if i > 0: c_out[0] = c'\x00' c_out = c_out + 1 crc_val = PyInt_AS_LONG(crc32(key[i])) # MSB order c_out[0] = (crc_val >> 24) & 0xFF c_out[1] = (crc_val >> 16) & 0xFF c_out[2] = (crc_val >> 8) & 0xFF c_out[3] = (crc_val >> 0) & 0xFF for j from 0 <= j < 4: if c_out[j] == c'\n': c_out[j] = c'_' c_out = c_out + 4 return out cdef int _get_int_from_line(char **cur, char *end, char *message) except -1: """Read a positive integer from the data stream. :param cur: The start of the data, this will be moved to after the trailing newline when done. :param end: Do not parse any data past this byte. :return: The integer stored in those bytes """ cdef int value cdef char *next_line, *next next_line = memchr(cur[0], c'\n', end - cur[0]) if next_line == NULL: raise ValueError("Missing %s line\n" % message) value = strtol(cur[0], &next, 10) if next != next_line: raise ValueError("%s line not a proper int\n" % message) cur[0] = next_line + 1 return value cdef _import_globals(): """Set the global attributes. Done lazy to avoid recursive import loops.""" global _LeafNode, _InternalNode, _unknown from bzrlib import chk_map _LeafNode = chk_map.LeafNode _InternalNode = chk_map.InternalNode _unknown = chk_map._unknown def _deserialise_leaf_node(bytes, key, search_key_func=None): """Deserialise bytes, with key key, into a LeafNode. :param bytes: The bytes of the node. :param key: The key that the serialised node has. """ cdef char *c_bytes, *cur, *next, *end cdef char *next_line cdef Py_ssize_t c_bytes_len, prefix_length, items_length cdef int maximum_size, width, length, i, prefix_tail_len cdef int num_value_lines, num_prefix_bits cdef char *prefix, *value_start, *prefix_tail cdef char *next_null, *last_null, *line_start cdef char *c_entry, *entry_start cdef StaticTuple entry_bits if _LeafNode is None: _import_globals() result = _LeafNode(search_key_func=search_key_func) # Splitlines can split on '\r' so don't use it, split('\n') adds an # extra '' if the bytes ends in a final newline. if not PyString_CheckExact(bytes): raise TypeError('bytes must be a plain string not %s' % (type(bytes),)) c_bytes = PyString_AS_STRING(bytes) c_bytes_len = PyString_GET_SIZE(bytes) if c_bytes_len < 9 or memcmp(c_bytes, "chkleaf:\n", 9) != 0: raise ValueError("not a serialised leaf node: %r" % bytes) if c_bytes[c_bytes_len - 1] != c'\n': raise ValueError("bytes does not end in a newline") end = c_bytes + c_bytes_len cur = c_bytes + 9 maximum_size = _get_int_from_line(&cur, end, "maximum_size") width = _get_int_from_line(&cur, end, "width") length = _get_int_from_line(&cur, end, "length") next_line = memchr(cur, c'\n', end - cur) if next_line == NULL: raise ValueError('Missing the prefix line\n') prefix = cur prefix_length = next_line - cur cur = next_line + 1 prefix_bits = [] prefix_tail = prefix num_prefix_bits = 0 next_null = memchr(prefix, c'\0', prefix_length) while next_null != NULL: num_prefix_bits = num_prefix_bits + 1 prefix_bits.append( PyString_FromStringAndSize(prefix_tail, next_null - prefix_tail)) prefix_tail = next_null + 1 next_null = memchr(prefix_tail, c'\0', next_line - prefix_tail) prefix_tail_len = next_line - prefix_tail if num_prefix_bits >= width: raise ValueError('Prefix has too many nulls versus width') items_length = end - cur items = {} while cur < end: line_start = cur next_line = memchr(cur, c'\n', end - cur) if next_line == NULL: raise ValueError('null line\n') last_null = _my_memrchr(cur, c'\0', next_line - cur) if last_null == NULL: raise ValueError('fail to find the num value lines null') next_null = last_null + 1 # move past NULL num_value_lines = _get_int_from_line(&next_null, next_line + 1, "num value lines") cur = next_line + 1 value_start = cur # Walk num_value_lines forward for i from 0 <= i < num_value_lines: next_line = memchr(cur, c'\n', end - cur) if next_line == NULL: raise ValueError('missing trailing newline') cur = next_line + 1 entry_bits = StaticTuple_New(width) for i from 0 <= i < num_prefix_bits: # TODO: Use PyList_GetItem, or turn prefix_bits into a # tuple/StaticTuple entry = prefix_bits[i] # SET_ITEM 'steals' a reference Py_INCREF(entry) StaticTuple_SET_ITEM(entry_bits, i, entry) value = PyString_FromStringAndSize(value_start, next_line - value_start) # The next entry bit needs the 'tail' from the prefix, and first part # of the line entry_start = line_start next_null = memchr(entry_start, c'\0', last_null - entry_start + 1) if next_null == NULL: raise ValueError('bad no null, bad') entry = PyString_FromStringAndSize(NULL, prefix_tail_len + next_null - line_start) c_entry = PyString_AS_STRING(entry) if prefix_tail_len > 0: memcpy(c_entry, prefix_tail, prefix_tail_len) if next_null - line_start > 0: memcpy(c_entry + prefix_tail_len, line_start, next_null - line_start) Py_INCREF(entry) i = num_prefix_bits StaticTuple_SET_ITEM(entry_bits, i, entry) while next_null != last_null: # We have remaining bits i = i + 1 if i > width: raise ValueError("Too many bits for entry") entry_start = next_null + 1 next_null = memchr(entry_start, c'\0', last_null - entry_start + 1) if next_null == NULL: raise ValueError('bad no null') entry = PyString_FromStringAndSize(entry_start, next_null - entry_start) Py_INCREF(entry) StaticTuple_SET_ITEM(entry_bits, i, entry) if StaticTuple_GET_SIZE(entry_bits) != width: raise AssertionError( 'Incorrect number of elements (%d vs %d)' % (len(entry_bits)+1, width + 1)) entry_bits = StaticTuple_Intern(entry_bits) PyDict_SetItem(items, entry_bits, value) if len(items) != length: raise ValueError("item count (%d) mismatch for key %s," " bytes %r" % (length, entry_bits, bytes)) result._items = items result._len = length result._maximum_size = maximum_size result._key = key result._key_width = width result._raw_size = items_length + length * prefix_length if length == 0: result._search_prefix = None result._common_serialised_prefix = None else: result._search_prefix = _unknown result._common_serialised_prefix = PyString_FromStringAndSize(prefix, prefix_length) if c_bytes_len != result._current_size(): raise AssertionError('_current_size computed incorrectly %d != %d', c_bytes_len, result._current_size()) return result def _deserialise_internal_node(bytes, key, search_key_func=None): cdef char *c_bytes, *cur, *next, *end cdef char *next_line cdef Py_ssize_t c_bytes_len, prefix_length cdef int maximum_size, width, length, i, prefix_tail_len cdef char *prefix, *line_prefix, *next_null, *c_item_prefix if _InternalNode is None: _import_globals() result = _InternalNode(search_key_func=search_key_func) if not StaticTuple_CheckExact(key): raise TypeError('key %r is not a StaticTuple' % (key,)) if not PyString_CheckExact(bytes): raise TypeError('bytes must be a plain string not %s' % (type(bytes),)) c_bytes = PyString_AS_STRING(bytes) c_bytes_len = PyString_GET_SIZE(bytes) if c_bytes_len < 9 or memcmp(c_bytes, "chknode:\n", 9) != 0: raise ValueError("not a serialised internal node: %r" % bytes) if c_bytes[c_bytes_len - 1] != c'\n': raise ValueError("bytes does not end in a newline") items = {} cur = c_bytes + 9 end = c_bytes + c_bytes_len maximum_size = _get_int_from_line(&cur, end, "maximum_size") width = _get_int_from_line(&cur, end, "width") length = _get_int_from_line(&cur, end, "length") next_line = memchr(cur, c'\n', end - cur) if next_line == NULL: raise ValueError('Missing the prefix line\n') prefix = cur prefix_length = next_line - cur cur = next_line + 1 while cur < end: # Find the null separator next_line = memchr(cur, c'\n', end - cur) if next_line == NULL: raise ValueError('missing trailing newline') next_null = _my_memrchr(cur, c'\0', next_line - cur) if next_null == NULL: raise ValueError('bad no null') item_prefix = PyString_FromStringAndSize(NULL, prefix_length + next_null - cur) c_item_prefix = PyString_AS_STRING(item_prefix) if prefix_length: memcpy(c_item_prefix, prefix, prefix_length) memcpy(c_item_prefix + prefix_length, cur, next_null - cur) flat_key = PyString_FromStringAndSize(next_null + 1, next_line - next_null - 1) flat_key = StaticTuple(flat_key).intern() PyDict_SetItem(items, item_prefix, flat_key) cur = next_line + 1 assert len(items) > 0 result._items = items result._len = length result._maximum_size = maximum_size result._key = key result._key_width = width # XXX: InternalNodes don't really care about their size, and this will # change if we add prefix compression result._raw_size = None # len(bytes) result._node_width = len(item_prefix) result._search_prefix = PyString_FromStringAndSize(prefix, prefix_length) return result def _bytes_to_text_key(bytes): """Take a CHKInventory value string and return a (file_id, rev_id) tuple""" cdef StaticTuple key cdef char *byte_str, *cur_end, *file_id_str, *byte_end cdef char *revision_str cdef Py_ssize_t byte_size, pos, file_id_len if not PyString_CheckExact(bytes): raise TypeError('bytes must be a string, got %r' % (type(bytes),)) byte_str = PyString_AS_STRING(bytes) byte_size = PyString_GET_SIZE(bytes) byte_end = byte_str + byte_size cur_end = memchr(byte_str, c':', byte_size) if cur_end == NULL: raise ValueError('No kind section found.') if cur_end[1] != c' ': raise ValueError( 'Kind section should end with ": ", got %r' % str(cur_end[:2],)) file_id_str = cur_end + 2 # file_id is now the data up until the next newline cur_end = memchr(file_id_str, c'\n', byte_end - file_id_str) if cur_end == NULL: raise ValueError('no newline after file-id') file_id = safe_interned_string_from_size(file_id_str, cur_end - file_id_str) # this is the end of the parent_str cur_end = memchr(cur_end + 1, c'\n', byte_end - cur_end - 1) if cur_end == NULL: raise ValueError('no newline after parent_str') # end of the name str cur_end = memchr(cur_end + 1, c'\n', byte_end - cur_end - 1) if cur_end == NULL: raise ValueError('no newline after name str') # the next section is the revision info revision_str = cur_end + 1 cur_end = memchr(cur_end + 1, c'\n', byte_end - cur_end - 1) if cur_end == NULL: # This is probably a dir: entry, which has revision as the last item cur_end = byte_end revision = safe_interned_string_from_size(revision_str, cur_end - revision_str) key = StaticTuple_New(2) Py_INCREF(file_id) StaticTuple_SET_ITEM(key, 0, file_id) Py_INCREF(revision) StaticTuple_SET_ITEM(key, 1, revision) return StaticTuple_Intern(key) bzr-2.7.0/bzrlib/_chunks_to_lines_py.py0000644000000000000000000000452311673635356016362 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """The python implementation of chunks_to_lines""" from __future__ import absolute_import def chunks_to_lines(chunks): """Re-split chunks into simple lines. Each entry in the result should contain a single newline at the end. Except for the last entry which may not have a final newline. If chunks is already a simple list of lines, we return it directly. :param chunks: An list/tuple of strings. If chunks is already a list of lines, then we will return it as-is. :return: A list of strings. """ # Optimize for a very common case when chunks are already lines last_no_newline = False for chunk in chunks: if last_no_newline: # Only the last chunk is allowed to not have a trailing newline # Getting here means the last chunk didn't have a newline, and we # have a chunk following it break if not chunk: # Empty strings are never valid lines break elif '\n' in chunk[:-1]: # This chunk has an extra '\n', so we will have to split it break elif chunk[-1] != '\n': # This chunk does not have a trailing newline last_no_newline = True else: # All of the lines (but possibly the last) have a single newline at the # end of the string. # For the last one, we allow it to not have a trailing newline, but it # is not allowed to be an empty string. return chunks # These aren't simple lines, just join and split again. from bzrlib import osutils return osutils._split_lines(''.join(chunks)) bzr-2.7.0/bzrlib/_chunks_to_lines_pyx.pyx0000644000000000000000000001131511161721737016726 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # """Pyrex extensions for converting chunks to lines.""" #python2.4 support cdef extern from "python-compat.h": pass cdef extern from "stdlib.h": ctypedef unsigned size_t cdef extern from "Python.h": ctypedef int Py_ssize_t # Required for older pyrex versions ctypedef struct PyObject: pass int PyList_Append(object lst, object item) except -1 int PyString_CheckExact(object p) char *PyString_AS_STRING(object p) Py_ssize_t PyString_GET_SIZE(object p) object PyString_FromStringAndSize(char *c_str, Py_ssize_t len) cdef extern from "string.h": void *memchr(void *s, int c, size_t n) def chunks_to_lines(chunks): """Re-split chunks into simple lines. Each entry in the result should contain a single newline at the end. Except for the last entry which may not have a final newline. If chunks is already a simple list of lines, we return it directly. :param chunks: An list/tuple of strings. If chunks is already a list of lines, then we will return it as-is. :return: A list of strings. """ cdef char *c_str cdef char *newline cdef char *c_last cdef Py_ssize_t the_len cdef int last_no_newline # Check to see if the chunks are already lines last_no_newline = 0 for chunk in chunks: if last_no_newline: # We have a chunk which followed a chunk without a newline, so this # is not a simple list of lines. break # Switching from PyString_AsStringAndSize to PyString_CheckExact and # then the macros GET_SIZE and AS_STRING saved us 40us / 470us. # It seems PyString_AsStringAndSize can actually trigger a conversion, # which we don't want anyway. if not PyString_CheckExact(chunk): raise TypeError('chunk is not a string') the_len = PyString_GET_SIZE(chunk) if the_len == 0: # An empty string is never a valid line break c_str = PyString_AS_STRING(chunk) c_last = c_str + the_len - 1 newline = memchr(c_str, c'\n', the_len) if newline != c_last: if newline == NULL: # Missing a newline. Only valid as the last line last_no_newline = 1 else: # There is a newline in the middle, we must resplit break else: # Everything was already a list of lines return chunks # We know we need to create a new list of lines lines = [] tail = None # Any remainder from the previous chunk for chunk in chunks: if tail is not None: chunk = tail + chunk tail = None if not PyString_CheckExact(chunk): raise TypeError('chunk is not a string') the_len = PyString_GET_SIZE(chunk) if the_len == 0: # An empty string is never a valid line, and we don't need to # append anything continue c_str = PyString_AS_STRING(chunk) c_last = c_str + the_len - 1 newline = memchr(c_str, c'\n', the_len) if newline == c_last: # A simple line PyList_Append(lines, chunk) elif newline == NULL: # A chunk without a newline, if this is the last entry, then we # allow it tail = chunk else: # We have a newline in the middle, loop until we've consumed all # lines while newline != NULL: line = PyString_FromStringAndSize(c_str, newline - c_str + 1) PyList_Append(lines, line) c_str = newline + 1 if c_str > c_last: # We are done break the_len = c_last - c_str + 1 newline = memchr(c_str, c'\n', the_len) if newline == NULL: tail = PyString_FromStringAndSize(c_str, the_len) break if tail is not None: PyList_Append(lines, tail) return lines bzr-2.7.0/bzrlib/_dirstate_helpers_py.py0000644000000000000000000002613211673635356016534 0ustar 00000000000000# Copyright (C) 2007, 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Python implementations of Dirstate Helper functions.""" from __future__ import absolute_import import binascii import os import struct # We cannot import the dirstate module, because it loads this module # All we really need is the IN_MEMORY_MODIFIED constant from bzrlib import errors from bzrlib.dirstate import DirState def pack_stat(st, _b64=binascii.b2a_base64, _pack=struct.Struct('>6L').pack): """Convert stat values into a packed representation Not all of the fields from the stat included are strictly needed, and by just encoding the mtime and mode a slight speed increase could be gained. However, using the pyrex version instead is a bigger win. """ # base64 encoding always adds a final newline, so strip it off return _b64(_pack(st.st_size & 0xFFFFFFFF, int(st.st_mtime) & 0xFFFFFFFF, int(st.st_ctime) & 0xFFFFFFFF, st.st_dev & 0xFFFFFFFF, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1] def _unpack_stat(packed_stat): """Turn a packed_stat back into the stat fields. This is meant as a debugging tool, should not be used in real code. """ (st_size, st_mtime, st_ctime, st_dev, st_ino, st_mode) = struct.unpack('>6L', binascii.a2b_base64(packed_stat)) return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime, st_dev=st_dev, st_ino=st_ino, st_mode=st_mode) def _bisect_path_left(paths, path): """Return the index where to insert path into paths. This uses the dirblock sorting. So all children in a directory come before the children of children. For example:: a/ b/ c d/ e b-c d-e a-a a=c Will be sorted as:: a a-a a=c a/b a/b-c a/d a/d-e a/b/c a/d/e :param paths: A list of paths to search through :param path: A single path to insert :return: An offset where 'path' can be inserted. :seealso: bisect.bisect_left """ hi = len(paths) lo = 0 while lo < hi: mid = (lo + hi) // 2 # Grab the dirname for the current dirblock cur = paths[mid] if _cmp_path_by_dirblock(cur, path) < 0: lo = mid + 1 else: hi = mid return lo def _bisect_path_right(paths, path): """Return the index where to insert path into paths. This uses a path-wise comparison so we get:: a a-b a=b a/b Rather than:: a a-b a/b a=b :param paths: A list of paths to search through :param path: A single path to insert :return: An offset where 'path' can be inserted. :seealso: bisect.bisect_right """ hi = len(paths) lo = 0 while lo < hi: mid = (lo+hi)//2 # Grab the dirname for the current dirblock cur = paths[mid] if _cmp_path_by_dirblock(path, cur) < 0: hi = mid else: lo = mid + 1 return lo def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}): """Return the index where to insert dirname into the dirblocks. The return value idx is such that all directories blocks in dirblock[:idx] have names < dirname, and all blocks in dirblock[idx:] have names >= dirname. Optional args lo (default 0) and hi (default len(dirblocks)) bound the slice of a to be searched. """ if hi is None: hi = len(dirblocks) try: dirname_split = cache[dirname] except KeyError: dirname_split = dirname.split('/') cache[dirname] = dirname_split while lo < hi: mid = (lo + hi) // 2 # Grab the dirname for the current dirblock cur = dirblocks[mid][0] try: cur_split = cache[cur] except KeyError: cur_split = cur.split('/') cache[cur] = cur_split if cur_split < dirname_split: lo = mid + 1 else: hi = mid return lo def cmp_by_dirs(path1, path2): """Compare two paths directory by directory. This is equivalent to doing:: cmp(path1.split('/'), path2.split('/')) The idea is that you should compare path components separately. This differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically. :param path1: first path :param path2: second path :return: negative number if ``path1`` comes first, 0 if paths are equal, and positive number if ``path2`` sorts first """ if not isinstance(path1, str): raise TypeError("'path1' must be a plain string, not %s: %r" % (type(path1), path1)) if not isinstance(path2, str): raise TypeError("'path2' must be a plain string, not %s: %r" % (type(path2), path2)) return cmp(path1.split('/'), path2.split('/')) def _cmp_path_by_dirblock(path1, path2): """Compare two paths based on what directory they are in. This generates a sort order, such that all children of a directory are sorted together, and grandchildren are in the same order as the children appear. But all grandchildren come after all children. :param path1: first path :param path2: the second path :return: negative number if ``path1`` comes first, 0 if paths are equal and a positive number if ``path2`` sorts first """ if not isinstance(path1, str): raise TypeError("'path1' must be a plain string, not %s: %r" % (type(path1), path1)) if not isinstance(path2, str): raise TypeError("'path2' must be a plain string, not %s: %r" % (type(path2), path2)) dirname1, basename1 = os.path.split(path1) key1 = (dirname1.split('/'), basename1) dirname2, basename2 = os.path.split(path2) key2 = (dirname2.split('/'), basename2) return cmp(key1, key2) def _read_dirblocks(state): """Read in the dirblocks for the given DirState object. This is tightly bound to the DirState internal representation. It should be thought of as a member function, which is only separated out so that we can re-write it in pyrex. :param state: A DirState object. :return: None """ state._state_file.seek(state._end_of_header) text = state._state_file.read() # TODO: check the crc checksums. crc_measured = zlib.crc32(text) fields = text.split('\0') # Remove the last blank entry trailing = fields.pop() if trailing != '': raise errors.DirstateCorrupt(state, 'trailing garbage: %r' % (trailing,)) # consider turning fields into a tuple. # skip the first field which is the trailing null from the header. cur = 1 # Each line now has an extra '\n' field which is not used # so we just skip over it # entry size: # 3 fields for the key # + number of fields per tree_data (5) * tree count # + newline num_present_parents = state._num_present_parents() tree_count = 1 + num_present_parents entry_size = state._fields_per_entry() expected_field_count = entry_size * state._num_entries field_count = len(fields) # this checks our adjustment, and also catches file too short. if field_count - cur != expected_field_count: raise errors.DirstateCorrupt(state, 'field count incorrect %s != %s, entry_size=%s, '\ 'num_entries=%s fields=%r' % ( field_count - cur, expected_field_count, entry_size, state._num_entries, fields)) if num_present_parents == 1: # Bind external functions to local names _int = int # We access all fields in order, so we can just iterate over # them. Grab an straight iterator over the fields. (We use an # iterator because we don't want to do a lot of additions, nor # do we want to do a lot of slicing) next = iter(fields).next # Move the iterator to the current position for x in xrange(cur): next() # The two blocks here are deliberate: the root block and the # contents-of-root block. state._dirblocks = [('', []), ('', [])] current_block = state._dirblocks[0][1] current_dirname = '' append_entry = current_block.append for count in xrange(state._num_entries): dirname = next() name = next() file_id = next() if dirname != current_dirname: # new block - different dirname current_block = [] current_dirname = dirname state._dirblocks.append((current_dirname, current_block)) append_entry = current_block.append # we know current_dirname == dirname, so re-use it to avoid # creating new strings entry = ((current_dirname, name, file_id), [(# Current Tree next(), # minikind next(), # fingerprint _int(next()), # size next() == 'y', # executable next(), # packed_stat or revision_id ), ( # Parent 1 next(), # minikind next(), # fingerprint _int(next()), # size next() == 'y', # executable next(), # packed_stat or revision_id ), ]) trailing = next() if trailing != '\n': raise ValueError("trailing garbage in dirstate: %r" % trailing) # append the entry to the current block append_entry(entry) state._split_root_dirblock_into_contents() else: fields_to_entry = state._get_fields_to_entry() entries = [fields_to_entry(fields[pos:pos+entry_size]) for pos in xrange(cur, field_count, entry_size)] state._entries_to_current_state(entries) # To convert from format 2 => format 3 # state._dirblocks = sorted(state._dirblocks, # key=lambda blk:blk[0].split('/')) # To convert from format 3 => format 2 # state._dirblocks = sorted(state._dirblocks) state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED bzr-2.7.0/bzrlib/_dirstate_helpers_pyx.h0000644000000000000000000000035011217677227016512 0ustar 00000000000000#ifndef _DIRSTATE_HELPERS_PYX_H #define _DIRSTATE_HELPERS_PYX_H /* for intptr_t */ #ifdef _MSC_VER #include #else #if defined(__SVR4) && defined(__sun) #include #else #include #endif #endif #endif bzr-2.7.0/bzrlib/_dirstate_helpers_pyx.pyx0000644000000000000000000026043511643355111017103 0ustar 00000000000000# Copyright (C) 2007-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Helper functions for DirState. This is the python implementation for DirState functions. """ import binascii import bisect import errno import os import stat import sys from bzrlib import cache_utf8, errors, osutils from bzrlib.dirstate import DirState from bzrlib.osutils import parent_directories, pathjoin, splitpath # This is the Windows equivalent of ENOTDIR # It is defined in pywin32.winerror, but we don't want a strong dependency for # just an error code. # XXX: Perhaps we could get it from a windows header ? cdef int ERROR_PATH_NOT_FOUND ERROR_PATH_NOT_FOUND = 3 cdef int ERROR_DIRECTORY ERROR_DIRECTORY = 267 #python2.4 support, and other platform-dependent includes cdef extern from "python-compat.h": unsigned long htonl(unsigned long) # Give Pyrex some function definitions for it to understand. # All of these are just hints to Pyrex, so that it can try to convert python # objects into similar C objects. (such as PyInt => int). # In anything defined 'cdef extern from XXX' the real C header will be # imported, and the real definition will be used from there. So these are just # hints, and do not need to match exactly to the C definitions. cdef extern from *: ctypedef unsigned long size_t cdef extern from "_dirstate_helpers_pyx.h": ctypedef int intptr_t cdef extern from "stdlib.h": unsigned long int strtoul(char *nptr, char **endptr, int base) cdef extern from 'sys/stat.h': int S_ISDIR(int mode) int S_ISREG(int mode) # On win32, this actually comes from "python-compat.h" int S_ISLNK(int mode) int S_IXUSR # These functions allow us access to a bit of the 'bare metal' of python # objects, rather than going through the object abstraction. (For example, # PyList_Append, rather than getting the 'append' attribute of the object, and # creating a tuple, and then using PyCallObject). # Functions that return (or take) a void* are meant to grab a C PyObject*. This # differs from the Pyrex 'object'. If you declare a variable as 'object' Pyrex # will automatically Py_INCREF and Py_DECREF when appropriate. But for some # inner loops, we don't need to do that at all, as the reference only lasts for # a very short time. # Note that the C API GetItem calls borrow references, so pyrex does the wrong # thing if you declare e.g. object PyList_GetItem(object lst, int index) - you # need to manually Py_INCREF yourself. cdef extern from "Python.h": ctypedef int Py_ssize_t ctypedef struct PyObject: pass int PyList_Append(object lst, object item) except -1 void *PyList_GetItem_object_void "PyList_GET_ITEM" (object lst, int index) void *PyList_GetItem_void_void "PyList_GET_ITEM" (void * lst, int index) object PyList_GET_ITEM(object lst, Py_ssize_t index) int PyList_CheckExact(object) Py_ssize_t PyList_GET_SIZE (object p) void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index) object PyTuple_GetItem_void_object "PyTuple_GET_ITEM" (void* tpl, int index) object PyTuple_GET_ITEM(object tpl, Py_ssize_t index) unsigned long PyInt_AsUnsignedLongMask(object number) except? -1 char *PyString_AsString(object p) char *PyString_AsString_obj "PyString_AsString" (PyObject *string) char *PyString_AS_STRING_void "PyString_AS_STRING" (void *p) int PyString_AsStringAndSize(object str, char **buffer, Py_ssize_t *length) except -1 object PyString_FromString(char *) object PyString_FromStringAndSize(char *, Py_ssize_t) int PyString_Size(object p) int PyString_GET_SIZE_void "PyString_GET_SIZE" (void *p) int PyString_CheckExact(object p) void Py_INCREF(object o) void Py_DECREF(object o) cdef extern from "string.h": int strncmp(char *s1, char *s2, int len) void *memchr(void *s, int c, size_t len) int memcmp(void *b1, void *b2, size_t len) # ??? memrchr is a GNU extension :( # void *memrchr(void *s, int c, size_t len) # cimport all of the definitions we will need to access from _static_tuple_c cimport import_static_tuple_c, StaticTuple, \ StaticTuple_New, StaticTuple_SET_ITEM import_static_tuple_c() cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise # memrchr seems to be a GNU extension, so we have to implement it ourselves cdef char *pos cdef char *start start = s pos = start + n - 1 while pos >= start: if pos[0] == c: return pos pos = pos - 1 return NULL def _py_memrchr(s, c): """Just to expose _my_memrchr for testing. :param s: The Python string to search :param c: The character to search for :return: The offset to the last instance of 'c' in s """ cdef void *_s cdef void *found cdef int length cdef char *_c _s = PyString_AsString(s) length = PyString_Size(s) _c = PyString_AsString(c) assert PyString_Size(c) == 1,\ 'Must be a single character string, not %s' % (c,) found = _my_memrchr(_s, _c[0], length) if found == NULL: return None return found - _s cdef object safe_string_from_size(char *s, Py_ssize_t size): if size < 0: raise AssertionError( 'tried to create a string with an invalid size: %d' % (size)) return PyString_FromStringAndSize(s, size) cdef int _is_aligned(void *ptr): # cannot_raise """Is this pointer aligned to an integer size offset? :return: 1 if this pointer is aligned, 0 otherwise. """ return ((ptr) & ((sizeof(int))-1)) == 0 cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise cdef unsigned char *cur1 cdef unsigned char *cur2 cdef unsigned char *end1 cdef unsigned char *end2 cdef int *cur_int1 cdef int *cur_int2 cdef int *end_int1 cdef int *end_int2 if path1 == path2 and size1 == size2: return 0 end1 = path1+size1 end2 = path2+size2 # Use 32-bit comparisons for the matching portion of the string. # Almost all CPU's are faster at loading and comparing 32-bit integers, # than they are at 8-bit integers. # 99% of the time, these will be aligned, but in case they aren't just skip # this loop if _is_aligned(path1) and _is_aligned(path2): cur_int1 = path1 cur_int2 = path2 end_int1 = (path1 + size1 - (size1 % sizeof(int))) end_int2 = (path2 + size2 - (size2 % sizeof(int))) while cur_int1 < end_int1 and cur_int2 < end_int2: if cur_int1[0] != cur_int2[0]: break cur_int1 = cur_int1 + 1 cur_int2 = cur_int2 + 1 cur1 = cur_int1 cur2 = cur_int2 else: cur1 = path1 cur2 = path2 while cur1 < end1 and cur2 < end2: if cur1[0] == cur2[0]: # This character matches, just go to the next one cur1 = cur1 + 1 cur2 = cur2 + 1 continue # The current characters do not match if cur1[0] == c'/': return -1 # Reached the end of path1 segment first elif cur2[0] == c'/': return 1 # Reached the end of path2 segment first elif cur1[0] < cur2[0]: return -1 else: return 1 # We reached the end of at least one of the strings if cur1 < end1: return 1 # Not at the end of cur1, must be at the end of cur2 if cur2 < end2: return -1 # At the end of cur1, but not at cur2 # We reached the end of both strings return 0 def cmp_by_dirs(path1, path2): """Compare two paths directory by directory. This is equivalent to doing:: cmp(path1.split('/'), path2.split('/')) The idea is that you should compare path components separately. This differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically. :param path1: first path :param path2: second path :return: negative number if ``path1`` comes first, 0 if paths are equal, and positive number if ``path2`` sorts first """ if not PyString_CheckExact(path1): raise TypeError("'path1' must be a plain string, not %s: %r" % (type(path1), path1)) if not PyString_CheckExact(path2): raise TypeError("'path2' must be a plain string, not %s: %r" % (type(path2), path2)) return _cmp_by_dirs(PyString_AsString(path1), PyString_Size(path1), PyString_AsString(path2), PyString_Size(path2)) def _cmp_path_by_dirblock(path1, path2): """Compare two paths based on what directory they are in. This generates a sort order, such that all children of a directory are sorted together, and grandchildren are in the same order as the children appear. But all grandchildren come after all children. In other words, all entries in a directory are sorted together, and directorys are sorted in cmp_by_dirs order. :param path1: first path :param path2: the second path :return: negative number if ``path1`` comes first, 0 if paths are equal and a positive number if ``path2`` sorts first """ if not PyString_CheckExact(path1): raise TypeError("'path1' must be a plain string, not %s: %r" % (type(path1), path1)) if not PyString_CheckExact(path2): raise TypeError("'path2' must be a plain string, not %s: %r" % (type(path2), path2)) return _cmp_path_by_dirblock_intern(PyString_AsString(path1), PyString_Size(path1), PyString_AsString(path2), PyString_Size(path2)) cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len, char *path2, int path2_len): # cannot_raise """Compare two paths by what directory they are in. see ``_cmp_path_by_dirblock`` for details. """ cdef char *dirname1 cdef int dirname1_len cdef char *dirname2 cdef int dirname2_len cdef char *basename1 cdef int basename1_len cdef char *basename2 cdef int basename2_len cdef int cur_len cdef int cmp_val if path1_len == 0 and path2_len == 0: return 0 if path1 == path2 and path1_len == path2_len: return 0 if path1_len == 0: return -1 if path2_len == 0: return 1 basename1 = _my_memrchr(path1, c'/', path1_len) if basename1 == NULL: basename1 = path1 basename1_len = path1_len dirname1 = '' dirname1_len = 0 else: dirname1 = path1 dirname1_len = basename1 - path1 basename1 = basename1 + 1 basename1_len = path1_len - dirname1_len - 1 basename2 = _my_memrchr(path2, c'/', path2_len) if basename2 == NULL: basename2 = path2 basename2_len = path2_len dirname2 = '' dirname2_len = 0 else: dirname2 = path2 dirname2_len = basename2 - path2 basename2 = basename2 + 1 basename2_len = path2_len - dirname2_len - 1 cmp_val = _cmp_by_dirs(dirname1, dirname1_len, dirname2, dirname2_len) if cmp_val != 0: return cmp_val cur_len = basename1_len if basename2_len < basename1_len: cur_len = basename2_len cmp_val = memcmp(basename1, basename2, cur_len) if cmp_val != 0: return cmp_val if basename1_len == basename2_len: return 0 if basename1_len < basename2_len: return -1 return 1 def _bisect_path_left(paths, path): """Return the index where to insert path into paths. This uses a path-wise comparison so we get:: a a-b a=b a/b Rather than:: a a-b a/b a=b :param paths: A list of paths to search through :param path: A single path to insert :return: An offset where 'path' can be inserted. :seealso: bisect.bisect_left """ cdef int _lo cdef int _hi cdef int _mid cdef char *path_cstr cdef int path_size cdef char *cur_cstr cdef int cur_size cdef void *cur if not PyList_CheckExact(paths): raise TypeError("you must pass a python list for 'paths' not: %s %r" % (type(paths), paths)) if not PyString_CheckExact(path): raise TypeError("you must pass a string for 'path' not: %s %r" % (type(path), path)) _hi = len(paths) _lo = 0 path_cstr = PyString_AsString(path) path_size = PyString_Size(path) while _lo < _hi: _mid = (_lo + _hi) / 2 cur = PyList_GetItem_object_void(paths, _mid) cur_cstr = PyString_AS_STRING_void(cur) cur_size = PyString_GET_SIZE_void(cur) if _cmp_path_by_dirblock_intern(cur_cstr, cur_size, path_cstr, path_size) < 0: _lo = _mid + 1 else: _hi = _mid return _lo def _bisect_path_right(paths, path): """Return the index where to insert path into paths. This uses a path-wise comparison so we get:: a a-b a=b a/b Rather than:: a a-b a/b a=b :param paths: A list of paths to search through :param path: A single path to insert :return: An offset where 'path' can be inserted. :seealso: bisect.bisect_right """ cdef int _lo cdef int _hi cdef int _mid cdef char *path_cstr cdef int path_size cdef char *cur_cstr cdef int cur_size cdef void *cur if not PyList_CheckExact(paths): raise TypeError("you must pass a python list for 'paths' not: %s %r" % (type(paths), paths)) if not PyString_CheckExact(path): raise TypeError("you must pass a string for 'path' not: %s %r" % (type(path), path)) _hi = len(paths) _lo = 0 path_cstr = PyString_AsString(path) path_size = PyString_Size(path) while _lo < _hi: _mid = (_lo + _hi) / 2 cur = PyList_GetItem_object_void(paths, _mid) cur_cstr = PyString_AS_STRING_void(cur) cur_size = PyString_GET_SIZE_void(cur) if _cmp_path_by_dirblock_intern(path_cstr, path_size, cur_cstr, cur_size) < 0: _hi = _mid else: _lo = _mid + 1 return _lo def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None): """Return the index where to insert dirname into the dirblocks. The return value idx is such that all directories blocks in dirblock[:idx] have names < dirname, and all blocks in dirblock[idx:] have names >= dirname. Optional args lo (default 0) and hi (default len(dirblocks)) bound the slice of a to be searched. """ cdef int _lo cdef int _hi cdef int _mid cdef char *dirname_cstr cdef int dirname_size cdef char *cur_cstr cdef int cur_size cdef void *cur if not PyList_CheckExact(dirblocks): raise TypeError("you must pass a python list for 'dirblocks' not: %s %r" % (type(dirblocks), dirblocks)) if not PyString_CheckExact(dirname): raise TypeError("you must pass a string for dirname not: %s %r" % (type(dirname), dirname)) if hi is None: _hi = len(dirblocks) else: _hi = hi _lo = lo dirname_cstr = PyString_AsString(dirname) dirname_size = PyString_Size(dirname) while _lo < _hi: _mid = (_lo + _hi) / 2 # Grab the dirname for the current dirblock # cur = dirblocks[_mid][0] cur = PyTuple_GetItem_void_void( PyList_GetItem_object_void(dirblocks, _mid), 0) cur_cstr = PyString_AS_STRING_void(cur) cur_size = PyString_GET_SIZE_void(cur) if _cmp_by_dirs(cur_cstr, cur_size, dirname_cstr, dirname_size) < 0: _lo = _mid + 1 else: _hi = _mid return _lo cdef class Reader: """Maintain the current location, and return fields as you parse them.""" cdef object state # The DirState object cdef object text # The overall string object cdef char *text_cstr # Pointer to the beginning of text cdef int text_size # Length of text cdef char *end_cstr # End of text cdef char *cur_cstr # Pointer to the current record cdef char *next # Pointer to the end of this record def __init__(self, text, state): self.state = state self.text = text self.text_cstr = PyString_AsString(text) self.text_size = PyString_Size(text) self.end_cstr = self.text_cstr + self.text_size self.cur_cstr = self.text_cstr cdef char *get_next(self, int *size) except NULL: """Return a pointer to the start of the next field.""" cdef char *next cdef Py_ssize_t extra_len if self.cur_cstr == NULL: raise AssertionError('get_next() called when cur_str is NULL') elif self.cur_cstr >= self.end_cstr: raise AssertionError('get_next() called when there are no chars' ' left') next = self.cur_cstr self.cur_cstr = memchr(next, c'\0', self.end_cstr - next) if self.cur_cstr == NULL: extra_len = self.end_cstr - next raise errors.DirstateCorrupt(self.state, 'failed to find trailing NULL (\\0).' ' Trailing garbage: %r' % safe_string_from_size(next, extra_len)) size[0] = self.cur_cstr - next self.cur_cstr = self.cur_cstr + 1 return next cdef object get_next_str(self): """Get the next field as a Python string.""" cdef int size cdef char *next next = self.get_next(&size) return safe_string_from_size(next, size) cdef int _init(self) except -1: """Get the pointer ready. This assumes that the dirstate header has already been read, and we already have the dirblock string loaded into memory. This just initializes our memory pointers, etc for parsing of the dirblock string. """ cdef char *first cdef int size # The first field should be an empty string left over from the Header first = self.get_next(&size) if first[0] != c'\0' and size == 0: raise AssertionError('First character should be null not: %s' % (first,)) return 0 cdef object _get_entry(self, int num_trees, void **p_current_dirname, int *new_block): """Extract the next entry. This parses the next entry based on the current location in ``self.cur_cstr``. Each entry can be considered a "row" in the total table. And each row has a fixed number of columns. It is generally broken up into "key" columns, then "current" columns, and then "parent" columns. :param num_trees: How many parent trees need to be parsed :param p_current_dirname: A pointer to the current PyString representing the directory name. We pass this in as a void * so that pyrex doesn't have to increment/decrement the PyObject reference counter for each _get_entry call. We use a pointer so that _get_entry can update it with the new value. :param new_block: This is to let the caller know that it needs to create a new directory block to store the next entry. """ cdef StaticTuple path_name_file_id_key cdef StaticTuple tmp cdef char *entry_size_cstr cdef unsigned long int entry_size cdef char* executable_cstr cdef int is_executable cdef char* dirname_cstr cdef char* trailing cdef int cur_size cdef int i cdef object minikind cdef object fingerprint cdef object info # Read the 'key' information (dirname, name, file_id) dirname_cstr = self.get_next(&cur_size) # Check to see if we have started a new directory block. # If so, then we need to create a new dirname PyString, so that it can # be used in all of the tuples. This saves time and memory, by re-using # the same object repeatedly. # Do the cheap 'length of string' check first. If the string is a # different length, then we *have* to be a different directory. if (cur_size != PyString_GET_SIZE_void(p_current_dirname[0]) or strncmp(dirname_cstr, # Extract the char* from our current dirname string. We # know it is a PyString, so we can use # PyString_AS_STRING, we use the _void version because # we are tricking Pyrex by using a void* rather than an # PyString_AS_STRING_void(p_current_dirname[0]), cur_size+1) != 0): dirname = safe_string_from_size(dirname_cstr, cur_size) p_current_dirname[0] = dirname new_block[0] = 1 else: new_block[0] = 0 # Build up the key that will be used. # By using (void *) Pyrex will automatically handle the # Py_INCREF that we need. cur_dirname = p_current_dirname[0] # Use StaticTuple_New to pre-allocate, rather than creating a regular # tuple and passing it to the StaticTuple constructor. # path_name_file_id_key = StaticTuple(p_current_dirname[0], # self.get_next_str(), # self.get_next_str(), # ) tmp = StaticTuple_New(3) Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname) cur_basename = self.get_next_str() cur_file_id = self.get_next_str() Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename) Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id) path_name_file_id_key = tmp # Parse all of the per-tree information. current has the information in # the same location as parent trees. The only difference is that 'info' # is a 'packed_stat' for current, while it is a 'revision_id' for # parent trees. # minikind, fingerprint, and info will be returned as regular python # strings # entry_size and is_executable will be parsed into a python Long and # python Boolean, respectively. # TODO: jam 20070718 Consider changin the entry_size conversion to # prefer python Int when possible. They are generally faster to # work with, and it will be rare that we have a file >2GB. # Especially since this code is pretty much fixed at a max of # 4GB. trees = [] for i from 0 <= i < num_trees: minikind = self.get_next_str() fingerprint = self.get_next_str() entry_size_cstr = self.get_next(&cur_size) entry_size = strtoul(entry_size_cstr, NULL, 10) executable_cstr = self.get_next(&cur_size) is_executable = (executable_cstr[0] == c'y') info = self.get_next_str() # TODO: If we want to use StaticTuple_New here we need to be pretty # careful. We are relying on a bit of Pyrex # automatic-conversion from 'int' to PyInt, and that doesn't # play well with the StaticTuple_SET_ITEM macro. # Timing doesn't (yet) show a worthwile improvement in speed # versus complexity and maintainability. # tmp = StaticTuple_New(5) # Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind) # Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint) # Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size) # Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable) # Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info) # PyList_Append(trees, tmp) PyList_Append(trees, StaticTuple( minikind, # minikind fingerprint, # fingerprint entry_size, # size is_executable,# executable info, # packed_stat or revision_id )) # The returned tuple is (key, [trees]) ret = (path_name_file_id_key, trees) # Ignore the trailing newline, but assert that it does exist, this # ensures that we always finish parsing a line on an end-of-entry # marker. trailing = self.get_next(&cur_size) if cur_size != 1 or trailing[0] != c'\n': raise errors.DirstateCorrupt(self.state, 'Bad parse, we expected to end on \\n, not: %d %s: %s' % (cur_size, safe_string_from_size(trailing, cur_size), ret)) return ret def _parse_dirblocks(self): """Parse all dirblocks in the state file.""" cdef int num_trees cdef object current_block cdef object entry cdef void * current_dirname cdef int new_block cdef int expected_entry_count cdef int entry_count num_trees = self.state._num_present_parents() + 1 expected_entry_count = self.state._num_entries # Ignore the first record self._init() current_block = [] dirblocks = [('', current_block), ('', [])] self.state._dirblocks = dirblocks obj = '' current_dirname = obj new_block = 0 entry_count = 0 # TODO: jam 2007-05-07 Consider pre-allocating some space for the # members, and then growing and shrinking from there. If most # directories have close to 10 entries in them, it would save a # few mallocs if we default our list size to something # reasonable. Or we could malloc it to something large (100 or # so), and then truncate. That would give us a malloc + realloc, # rather than lots of reallocs. while self.cur_cstr < self.end_cstr: entry = self._get_entry(num_trees, ¤t_dirname, &new_block) if new_block: # new block - different dirname current_block = [] PyList_Append(dirblocks, (current_dirname, current_block)) PyList_Append(current_block, entry) entry_count = entry_count + 1 if entry_count != expected_entry_count: raise errors.DirstateCorrupt(self.state, 'We read the wrong number of entries.' ' We expected to read %s, but read %s' % (expected_entry_count, entry_count)) self.state._split_root_dirblock_into_contents() def _read_dirblocks(state): """Read in the dirblocks for the given DirState object. This is tightly bound to the DirState internal representation. It should be thought of as a member function, which is only separated out so that we can re-write it in pyrex. :param state: A DirState object. :return: None :postcondition: The dirblocks will be loaded into the appropriate fields in the DirState object. """ state._state_file.seek(state._end_of_header) text = state._state_file.read() # TODO: check the crc checksums. crc_measured = zlib.crc32(text) reader = Reader(text, state) reader._parse_dirblocks() state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED cdef int minikind_from_mode(int mode): # cannot_raise # in order of frequency: if S_ISREG(mode): return c"f" if S_ISDIR(mode): return c"d" if S_ISLNK(mode): return c"l" return 0 _encode = binascii.b2a_base64 cdef _pack_stat(stat_value): """return a string representing the stat value's key fields. :param stat_value: A stat oject with st_size, st_mtime, st_ctime, st_dev, st_ino and st_mode fields. """ cdef char result[6*4] # 6 long ints cdef int *aliased aliased = result aliased[0] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_size)) # mtime and ctime will often be floats but get converted to PyInt within aliased[1] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mtime)) aliased[2] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ctime)) aliased[3] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_dev)) aliased[4] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ino)) aliased[5] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mode)) packed = PyString_FromStringAndSize(result, 6*4) return _encode(packed)[:-1] def pack_stat(stat_value): """Convert stat value into a packed representation quickly with pyrex""" return _pack_stat(stat_value) def update_entry(self, entry, abspath, stat_value): """Update the entry based on what is actually on disk. This function only calculates the sha if it needs to - if the entry is uncachable, or clearly different to the first parent's entry, no sha is calculated, and None is returned. :param entry: This is the dirblock entry for the file in question. :param abspath: The path on disk for this file. :param stat_value: (optional) if we already have done a stat on the file, re-use it. :return: None, or The sha1 hexdigest of the file (40 bytes) or link target of a symlink. """ return _update_entry(self, entry, abspath, stat_value) cdef _update_entry(self, entry, abspath, stat_value): """Update the entry based on what is actually on disk. This function only calculates the sha if it needs to - if the entry is uncachable, or clearly different to the first parent's entry, no sha is calculated, and None is returned. :param self: The dirstate object this is operating on. :param entry: This is the dirblock entry for the file in question. :param abspath: The path on disk for this file. :param stat_value: The stat value done on the path. :return: None, or The sha1 hexdigest of the file (40 bytes) or link target of a symlink. """ # TODO - require pyrex 0.9.8, then use a pyd file to define access to the # _st mode of the compiled stat objects. cdef int minikind, saved_minikind cdef void * details cdef int worth_saving minikind = minikind_from_mode(stat_value.st_mode) if 0 == minikind: return None packed_stat = _pack_stat(stat_value) details = PyList_GetItem_void_void(PyTuple_GetItem_void_void(entry, 1), 0) saved_minikind = PyString_AsString_obj(PyTuple_GetItem_void_void(details, 0))[0] if minikind == c'd' and saved_minikind == c't': minikind = c't' saved_link_or_sha1 = PyTuple_GetItem_void_object(details, 1) saved_file_size = PyTuple_GetItem_void_object(details, 2) saved_executable = PyTuple_GetItem_void_object(details, 3) saved_packed_stat = PyTuple_GetItem_void_object(details, 4) # Deal with pyrex decrefing the objects Py_INCREF(saved_link_or_sha1) Py_INCREF(saved_file_size) Py_INCREF(saved_executable) Py_INCREF(saved_packed_stat) #(saved_minikind, saved_link_or_sha1, saved_file_size, # saved_executable, saved_packed_stat) = entry[1][0] if (minikind == saved_minikind and packed_stat == saved_packed_stat): # The stat hasn't changed since we saved, so we can re-use the # saved sha hash. if minikind == c'd': return None # size should also be in packed_stat if saved_file_size == stat_value.st_size: return saved_link_or_sha1 # If we have gotten this far, that means that we need to actually # process this entry. link_or_sha1 = None worth_saving = 1 if minikind == c'f': executable = self._is_executable(stat_value.st_mode, saved_executable) if self._cutoff_time is None: self._sha_cutoff_time() if (stat_value.st_mtime < self._cutoff_time and stat_value.st_ctime < self._cutoff_time and len(entry[1]) > 1 and entry[1][1][0] != 'a'): # Could check for size changes for further optimised # avoidance of sha1's. However the most prominent case of # over-shaing is during initial add, which this catches. link_or_sha1 = self._sha1_file(abspath) entry[1][0] = ('f', link_or_sha1, stat_value.st_size, executable, packed_stat) else: # This file is not worth caching the sha1. Either it is too new, or # it is newly added. Regardless, the only things we are changing # are derived from the stat, and so are not worth caching. So we do # *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the # updated values if there is *other* data worth saving.) entry[1][0] = ('f', '', stat_value.st_size, executable, DirState.NULLSTAT) worth_saving = 0 elif minikind == c'd': entry[1][0] = ('d', '', 0, False, packed_stat) if saved_minikind != c'd': # This changed from something into a directory. Make sure we # have a directory block for it. This doesn't happen very # often, so this doesn't have to be super fast. block_index, entry_index, dir_present, file_present = \ self._get_block_entry_index(entry[0][0], entry[0][1], 0) self._ensure_block(block_index, entry_index, pathjoin(entry[0][0], entry[0][1])) else: # Any changes are derived trivially from the stat object, not worth # re-writing a dirstate for just this worth_saving = 0 elif minikind == c'l': if saved_minikind == c'l': # If the object hasn't changed kind, it isn't worth saving the # dirstate just for a symlink. The default is 'fast symlinks' which # save the target in the inode entry, rather than separately. So to # stat, we've already read everything off disk. worth_saving = 0 link_or_sha1 = self._read_link(abspath, saved_link_or_sha1) if self._cutoff_time is None: self._sha_cutoff_time() if (stat_value.st_mtime < self._cutoff_time and stat_value.st_ctime < self._cutoff_time): entry[1][0] = ('l', link_or_sha1, stat_value.st_size, False, packed_stat) else: entry[1][0] = ('l', '', stat_value.st_size, False, DirState.NULLSTAT) if worth_saving: # Note, even though _mark_modified will only set # IN_MEMORY_HASH_MODIFIED, it still isn't worth self._mark_modified([entry]) return link_or_sha1 # TODO: Do we want to worry about exceptions here? cdef char _minikind_from_string(object string) except? -1: """Convert a python string to a char.""" return PyString_AsString(string)[0] cdef object _kind_absent cdef object _kind_file cdef object _kind_directory cdef object _kind_symlink cdef object _kind_relocated cdef object _kind_tree_reference _kind_absent = "absent" _kind_file = "file" _kind_directory = "directory" _kind_symlink = "symlink" _kind_relocated = "relocated" _kind_tree_reference = "tree-reference" cdef object _minikind_to_kind(char minikind): """Create a string kind for minikind.""" cdef char _minikind[1] if minikind == c'f': return _kind_file elif minikind == c'd': return _kind_directory elif minikind == c'a': return _kind_absent elif minikind == c'r': return _kind_relocated elif minikind == c'l': return _kind_symlink elif minikind == c't': return _kind_tree_reference _minikind[0] = minikind raise KeyError(PyString_FromStringAndSize(_minikind, 1)) cdef int _versioned_minikind(char minikind): # cannot_raise """Return non-zero if minikind is in fltd""" return (minikind == c'f' or minikind == c'd' or minikind == c'l' or minikind == c't') cdef class ProcessEntryC: cdef int doing_consistency_expansion cdef object old_dirname_to_file_id # dict cdef object new_dirname_to_file_id # dict cdef object last_source_parent cdef object last_target_parent cdef int include_unchanged cdef int partial cdef object use_filesystem_for_exec cdef object utf8_decode cdef readonly object searched_specific_files cdef readonly object searched_exact_paths cdef object search_specific_files # The parents up to the root of the paths we are searching. # After all normal paths are returned, these specific items are returned. cdef object search_specific_file_parents cdef object state # Current iteration variables: cdef object current_root cdef object current_root_unicode cdef object root_entries cdef int root_entries_pos, root_entries_len cdef object root_abspath cdef int source_index, target_index cdef int want_unversioned cdef object tree cdef object dir_iterator cdef int block_index cdef object current_block cdef int current_block_pos cdef object current_block_list cdef object current_dir_info cdef object current_dir_list cdef object _pending_consistent_entries # list cdef int path_index cdef object root_dir_info cdef object bisect_left cdef object pathjoin cdef object fstat # A set of the ids we've output when doing partial output. cdef object seen_ids cdef object sha_file def __init__(self, include_unchanged, use_filesystem_for_exec, search_specific_files, state, source_index, target_index, want_unversioned, tree): self.doing_consistency_expansion = 0 self.old_dirname_to_file_id = {} self.new_dirname_to_file_id = {} # Are we doing a partial iter_changes? self.partial = set(['']).__ne__(search_specific_files) # Using a list so that we can access the values and change them in # nested scope. Each one is [path, file_id, entry] self.last_source_parent = [None, None] self.last_target_parent = [None, None] if include_unchanged is None: self.include_unchanged = False else: self.include_unchanged = int(include_unchanged) self.use_filesystem_for_exec = use_filesystem_for_exec self.utf8_decode = cache_utf8._utf8_decode # for all search_indexs in each path at or under each element of # search_specific_files, if the detail is relocated: add the id, and # add the relocated path as one to search if its not searched already. # If the detail is not relocated, add the id. self.searched_specific_files = set() # When we search exact paths without expanding downwards, we record # that here. self.searched_exact_paths = set() self.search_specific_files = search_specific_files # The parents up to the root of the paths we are searching. # After all normal paths are returned, these specific items are returned. self.search_specific_file_parents = set() # The ids we've sent out in the delta. self.seen_ids = set() self.state = state self.current_root = None self.current_root_unicode = None self.root_entries = None self.root_entries_pos = 0 self.root_entries_len = 0 self.root_abspath = None if source_index is None: self.source_index = -1 else: self.source_index = source_index self.target_index = target_index self.want_unversioned = want_unversioned self.tree = tree self.dir_iterator = None self.block_index = -1 self.current_block = None self.current_block_list = None self.current_block_pos = -1 self.current_dir_info = None self.current_dir_list = None self._pending_consistent_entries = [] self.path_index = 0 self.root_dir_info = None self.bisect_left = bisect.bisect_left self.pathjoin = osutils.pathjoin self.fstat = os.fstat self.sha_file = osutils.sha_file if target_index != 0: # A lot of code in here depends on target_index == 0 raise errors.BzrError('unsupported target index') cdef _process_entry(self, entry, path_info): """Compare an entry and real disk to generate delta information. :param path_info: top_relpath, basename, kind, lstat, abspath for the path of entry. If None, then the path is considered absent in the target (Perhaps we should pass in a concrete entry for this ?) Basename is returned as a utf8 string because we expect this tuple will be ignored, and don't want to take the time to decode. :return: (iter_changes_result, changed). If the entry has not been handled then changed is None. Otherwise it is False if no content or metadata changes have occured, and True if any content or metadata change has occurred. If self.include_unchanged is True then if changed is not None, iter_changes_result will always be a result tuple. Otherwise, iter_changes_result is None unless changed is True. """ cdef char target_minikind cdef char source_minikind cdef object file_id cdef int content_change cdef object details_list file_id = None details_list = entry[1] if -1 == self.source_index: source_details = DirState.NULL_PARENT_DETAILS else: source_details = details_list[self.source_index] target_details = details_list[self.target_index] target_minikind = _minikind_from_string(target_details[0]) if path_info is not None and _versioned_minikind(target_minikind): if self.target_index != 0: raise AssertionError("Unsupported target index %d" % self.target_index) link_or_sha1 = _update_entry(self.state, entry, path_info[4], path_info[3]) # The entry may have been modified by update_entry target_details = details_list[self.target_index] target_minikind = _minikind_from_string(target_details[0]) else: link_or_sha1 = None # the rest of this function is 0.3 seconds on 50K paths, or # 0.000006 seconds per call. source_minikind = _minikind_from_string(source_details[0]) if ((_versioned_minikind(source_minikind) or source_minikind == c'r') and _versioned_minikind(target_minikind)): # claimed content in both: diff # r | fdlt | | add source to search, add id path move and perform # | | | diff check on source-target # r | fdlt | a | dangling file that was present in the basis. # | | | ??? if source_minikind != c'r': old_dirname = entry[0][0] old_basename = entry[0][1] old_path = path = None else: # add the source to the search path to find any children it # has. TODO ? : only add if it is a container ? if (not self.doing_consistency_expansion and not osutils.is_inside_any(self.searched_specific_files, source_details[1])): self.search_specific_files.add(source_details[1]) # expanding from a user requested path, parent expansion # for delta consistency happens later. # generate the old path; this is needed for stating later # as well. old_path = source_details[1] old_dirname, old_basename = os.path.split(old_path) path = self.pathjoin(entry[0][0], entry[0][1]) old_entry = self.state._get_entry(self.source_index, path_utf8=old_path) # update the source details variable to be the real # location. if old_entry == (None, None): raise errors.CorruptDirstate(self.state._filename, "entry '%s/%s' is considered renamed from %r" " but source does not exist\n" "entry: %s" % (entry[0][0], entry[0][1], old_path, entry)) source_details = old_entry[1][self.source_index] source_minikind = _minikind_from_string(source_details[0]) if path_info is None: # the file is missing on disk, show as removed. content_change = 1 target_kind = None target_exec = False else: # source and target are both versioned and disk file is present. target_kind = path_info[2] if target_kind == 'directory': if path is None: old_path = path = self.pathjoin(old_dirname, old_basename) file_id = entry[0][2] self.new_dirname_to_file_id[path] = file_id if source_minikind != c'd': content_change = 1 else: # directories have no fingerprint content_change = 0 target_exec = False elif target_kind == 'file': if source_minikind != c'f': content_change = 1 else: # Check the sha. We can't just rely on the size as # content filtering may mean differ sizes actually # map to the same content if link_or_sha1 is None: # Stat cache miss: statvalue, link_or_sha1 = \ self.state._sha1_provider.stat_and_sha1( path_info[4]) self.state._observed_sha1(entry, link_or_sha1, statvalue) content_change = (link_or_sha1 != source_details[1]) # Target details is updated at update_entry time if self.use_filesystem_for_exec: # We don't need S_ISREG here, because we are sure # we are dealing with a file. target_exec = bool(S_IXUSR & path_info[3].st_mode) else: target_exec = target_details[3] elif target_kind == 'symlink': if source_minikind != c'l': content_change = 1 else: content_change = (link_or_sha1 != source_details[1]) target_exec = False elif target_kind == 'tree-reference': if source_minikind != c't': content_change = 1 else: content_change = 0 target_exec = False else: if path is None: path = self.pathjoin(old_dirname, old_basename) raise errors.BadFileKindError(path, path_info[2]) if source_minikind == c'd': if path is None: old_path = path = self.pathjoin(old_dirname, old_basename) if file_id is None: file_id = entry[0][2] self.old_dirname_to_file_id[old_path] = file_id # parent id is the entry for the path in the target tree if old_basename and old_dirname == self.last_source_parent[0]: # use a cached hit for non-root source entries. source_parent_id = self.last_source_parent[1] else: try: source_parent_id = self.old_dirname_to_file_id[old_dirname] except KeyError, _: source_parent_entry = self.state._get_entry(self.source_index, path_utf8=old_dirname) source_parent_id = source_parent_entry[0][2] if source_parent_id == entry[0][2]: # This is the root, so the parent is None source_parent_id = None else: self.last_source_parent[0] = old_dirname self.last_source_parent[1] = source_parent_id new_dirname = entry[0][0] if entry[0][1] and new_dirname == self.last_target_parent[0]: # use a cached hit for non-root target entries. target_parent_id = self.last_target_parent[1] else: try: target_parent_id = self.new_dirname_to_file_id[new_dirname] except KeyError, _: # TODO: We don't always need to do the lookup, because the # parent entry will be the same as the source entry. target_parent_entry = self.state._get_entry(self.target_index, path_utf8=new_dirname) if target_parent_entry == (None, None): raise AssertionError( "Could not find target parent in wt: %s\nparent of: %s" % (new_dirname, entry)) target_parent_id = target_parent_entry[0][2] if target_parent_id == entry[0][2]: # This is the root, so the parent is None target_parent_id = None else: self.last_target_parent[0] = new_dirname self.last_target_parent[1] = target_parent_id source_exec = source_details[3] changed = (content_change or source_parent_id != target_parent_id or old_basename != entry[0][1] or source_exec != target_exec ) if not changed and not self.include_unchanged: return None, False else: if old_path is None: path = self.pathjoin(old_dirname, old_basename) old_path = path old_path_u = self.utf8_decode(old_path)[0] path_u = old_path_u else: old_path_u = self.utf8_decode(old_path)[0] if old_path == path: path_u = old_path_u else: path_u = self.utf8_decode(path)[0] source_kind = _minikind_to_kind(source_minikind) return (entry[0][2], (old_path_u, path_u), content_change, (True, True), (source_parent_id, target_parent_id), (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]), (source_kind, target_kind), (source_exec, target_exec)), changed elif source_minikind == c'a' and _versioned_minikind(target_minikind): # looks like a new file path = self.pathjoin(entry[0][0], entry[0][1]) # parent id is the entry for the path in the target tree # TODO: these are the same for an entire directory: cache em. parent_entry = self.state._get_entry(self.target_index, path_utf8=entry[0][0]) if parent_entry is None: raise errors.DirstateCorrupt(self.state, "We could not find the parent entry in index %d" " for the entry: %s" % (self.target_index, entry[0])) parent_id = parent_entry[0][2] if parent_id == entry[0][2]: parent_id = None if path_info is not None: # Present on disk: if self.use_filesystem_for_exec: # We need S_ISREG here, because we aren't sure if this # is a file or not. target_exec = bool( S_ISREG(path_info[3].st_mode) and S_IXUSR & path_info[3].st_mode) else: target_exec = target_details[3] return (entry[0][2], (None, self.utf8_decode(path)[0]), True, (False, True), (None, parent_id), (None, self.utf8_decode(entry[0][1])[0]), (None, path_info[2]), (None, target_exec)), True else: # Its a missing file, report it as such. return (entry[0][2], (None, self.utf8_decode(path)[0]), False, (False, True), (None, parent_id), (None, self.utf8_decode(entry[0][1])[0]), (None, None), (None, False)), True elif _versioned_minikind(source_minikind) and target_minikind == c'a': # unversioned, possibly, or possibly not deleted: we dont care. # if its still on disk, *and* theres no other entry at this # path [we dont know this in this routine at the moment - # perhaps we should change this - then it would be an unknown. old_path = self.pathjoin(entry[0][0], entry[0][1]) # parent id is the entry for the path in the target tree parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2] if parent_id == entry[0][2]: parent_id = None return (entry[0][2], (self.utf8_decode(old_path)[0], None), True, (True, False), (parent_id, None), (self.utf8_decode(entry[0][1])[0], None), (_minikind_to_kind(source_minikind), None), (source_details[3], None)), True elif _versioned_minikind(source_minikind) and target_minikind == c'r': # a rename; could be a true rename, or a rename inherited from # a renamed parent. TODO: handle this efficiently. Its not # common case to rename dirs though, so a correct but slow # implementation will do. if (not self.doing_consistency_expansion and not osutils.is_inside_any(self.searched_specific_files, target_details[1])): self.search_specific_files.add(target_details[1]) # We don't expand the specific files parents list here as # the path is absent in target and won't create a delta with # missing parent. elif ((source_minikind == c'r' or source_minikind == c'a') and (target_minikind == c'r' or target_minikind == c'a')): # neither of the selected trees contain this path, # so skip over it. This is not currently directly tested, but # is indirectly via test_too_much.TestCommands.test_conflicts. pass else: raise AssertionError("don't know how to compare " "source_minikind=%r, target_minikind=%r" % (source_minikind, target_minikind)) ## import pdb;pdb.set_trace() return None, None def __iter__(self): return self def iter_changes(self): return self cdef int _gather_result_for_consistency(self, result) except -1: """Check a result we will yield to make sure we are consistent later. This gathers result's parents into a set to output later. :param result: A result tuple. """ if not self.partial or not result[0]: return 0 self.seen_ids.add(result[0]) new_path = result[1][1] if new_path: # Not the root and not a delete: queue up the parents of the path. self.search_specific_file_parents.update( osutils.parent_directories(new_path.encode('utf8'))) # Add the root directory which parent_directories does not # provide. self.search_specific_file_parents.add('') return 0 cdef int _update_current_block(self) except -1: if (self.block_index < len(self.state._dirblocks) and osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])): self.current_block = self.state._dirblocks[self.block_index] self.current_block_list = self.current_block[1] self.current_block_pos = 0 else: self.current_block = None self.current_block_list = None return 0 def __next__(self): # Simple thunk to allow tail recursion without pyrex confusion return self._iter_next() cdef _iter_next(self): """Iterate over the changes.""" # This function single steps through an iterator. As such while loops # are often exited by 'return' - the code is structured so that the # next call into the function will return to the same while loop. Note # that all flow control needed to re-reach that step is reexecuted, # which can be a performance problem. It has not yet been tuned to # minimise this; a state machine is probably the simplest restructuring # to both minimise this overhead and make the code considerably more # understandable. # sketch: # compare source_index and target_index at or under each element of search_specific_files. # follow the following comparison table. Note that we only want to do diff operations when # the target is fdl because thats when the walkdirs logic will have exposed the pathinfo # for the target. # cases: # # Source | Target | disk | action # r | fdlt | | add source to search, add id path move and perform # | | | diff check on source-target # r | fdlt | a | dangling file that was present in the basis. # | | | ??? # r | a | | add source to search # r | a | a | # r | r | | this path is present in a non-examined tree, skip. # r | r | a | this path is present in a non-examined tree, skip. # a | fdlt | | add new id # a | fdlt | a | dangling locally added file, skip # a | a | | not present in either tree, skip # a | a | a | not present in any tree, skip # a | r | | not present in either tree at this path, skip as it # | | | may not be selected by the users list of paths. # a | r | a | not present in either tree at this path, skip as it # | | | may not be selected by the users list of paths. # fdlt | fdlt | | content in both: diff them # fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ? # fdlt | a | | unversioned: output deleted id for now # fdlt | a | a | unversioned and deleted: output deleted id # fdlt | r | | relocated in this tree, so add target to search. # | | | Dont diff, we will see an r,fd; pair when we reach # | | | this id at the other path. # fdlt | r | a | relocated in this tree, so add target to search. # | | | Dont diff, we will see an r,fd; pair when we reach # | | | this id at the other path. # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by # keeping a cache of directories that we have seen. cdef object current_dirname, current_blockname cdef char * current_dirname_c, * current_blockname_c cdef int advance_entry, advance_path cdef int path_handled searched_specific_files = self.searched_specific_files # Are we walking a root? while self.root_entries_pos < self.root_entries_len: entry = self.root_entries[self.root_entries_pos] self.root_entries_pos = self.root_entries_pos + 1 result, changed = self._process_entry(entry, self.root_dir_info) if changed is not None: if changed: self._gather_result_for_consistency(result) if changed or self.include_unchanged: return result # Have we finished the prior root, or never started one ? if self.current_root is None: # TODO: the pending list should be lexically sorted? the # interface doesn't require it. try: self.current_root = self.search_specific_files.pop() except KeyError, _: raise StopIteration() self.searched_specific_files.add(self.current_root) # process the entries for this containing directory: the rest will be # found by their parents recursively. self.root_entries = self.state._entries_for_path(self.current_root) self.root_entries_len = len(self.root_entries) self.current_root_unicode = self.current_root.decode('utf8') self.root_abspath = self.tree.abspath(self.current_root_unicode) try: root_stat = os.lstat(self.root_abspath) except OSError, e: if e.errno == errno.ENOENT: # the path does not exist: let _process_entry know that. self.root_dir_info = None else: # some other random error: hand it up. raise else: self.root_dir_info = ('', self.current_root, osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat, self.root_abspath) if self.root_dir_info[2] == 'directory': if self.tree._directory_is_tree_reference( self.current_root_unicode): self.root_dir_info = self.root_dir_info[:2] + \ ('tree-reference',) + self.root_dir_info[3:] if not self.root_entries and not self.root_dir_info: # this specified path is not present at all, skip it. # (tail recursion, can do a loop once the full structure is # known). return self._iter_next() path_handled = 0 self.root_entries_pos = 0 # XXX Clarity: This loop is duplicated a out the self.current_root # is None guard above: if we return from it, it completes there # (and the following if block cannot trigger because # path_handled must be true, so the if block is not # duplicated. while self.root_entries_pos < self.root_entries_len: entry = self.root_entries[self.root_entries_pos] self.root_entries_pos = self.root_entries_pos + 1 result, changed = self._process_entry(entry, self.root_dir_info) if changed is not None: path_handled = -1 if changed: self._gather_result_for_consistency(result) if changed or self.include_unchanged: return result # handle unversioned specified paths: if self.want_unversioned and not path_handled and self.root_dir_info: new_executable = bool( stat.S_ISREG(self.root_dir_info[3].st_mode) and stat.S_IEXEC & self.root_dir_info[3].st_mode) return (None, (None, self.current_root_unicode), True, (False, False), (None, None), (None, splitpath(self.current_root_unicode)[-1]), (None, self.root_dir_info[2]), (None, new_executable) ) # If we reach here, the outer flow continues, which enters into the # per-root setup logic. if (self.current_dir_info is None and self.current_block is None and not self.doing_consistency_expansion): # setup iteration of this root: self.current_dir_list = None if self.root_dir_info and self.root_dir_info[2] == 'tree-reference': self.current_dir_info = None else: self.dir_iterator = osutils._walkdirs_utf8(self.root_abspath, prefix=self.current_root) self.path_index = 0 try: self.current_dir_info = self.dir_iterator.next() self.current_dir_list = self.current_dir_info[1] except OSError, e: # there may be directories in the inventory even though # this path is not a file on disk: so mark it as end of # iterator if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL): self.current_dir_info = None elif sys.platform == 'win32': # on win32, python2.4 has e.errno == ERROR_DIRECTORY, but # python 2.5 has e.errno == EINVAL, # and e.winerror == ERROR_DIRECTORY try: e_winerror = e.winerror except AttributeError, _: e_winerror = None win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND) if (e.errno in win_errors or e_winerror in win_errors): self.current_dir_info = None else: # Will this really raise the right exception ? raise else: raise else: if self.current_dir_info[0][0] == '': # remove .bzr from iteration bzr_index = self.bisect_left(self.current_dir_list, ('.bzr',)) if self.current_dir_list[bzr_index][0] != '.bzr': raise AssertionError() del self.current_dir_list[bzr_index] initial_key = (self.current_root, '', '') self.block_index, _ = self.state._find_block_index_from_key(initial_key) if self.block_index == 0: # we have processed the total root already, but because the # initial key matched it we should skip it here. self.block_index = self.block_index + 1 self._update_current_block() # walk until both the directory listing and the versioned metadata # are exhausted. while (self.current_dir_info is not None or self.current_block is not None): # Uncommon case - a missing directory or an unversioned directory: if (self.current_dir_info and self.current_block and self.current_dir_info[0][0] != self.current_block[0]): # Work around pyrex broken heuristic - current_dirname has # the same scope as current_dirname_c current_dirname = self.current_dir_info[0][0] current_dirname_c = PyString_AS_STRING_void( current_dirname) current_blockname = self.current_block[0] current_blockname_c = PyString_AS_STRING_void( current_blockname) # In the python generator we evaluate this if block once per # dir+block; because we reenter in the pyrex version its being # evaluated once per path: we could cache the result before # doing the while loop and probably save time. if _cmp_by_dirs(current_dirname_c, PyString_Size(current_dirname), current_blockname_c, PyString_Size(current_blockname)) < 0: # filesystem data refers to paths not covered by the # dirblock. this has two possibilities: # A) it is versioned but empty, so there is no block for it # B) it is not versioned. # if (A) then we need to recurse into it to check for # new unknown files or directories. # if (B) then we should ignore it, because we don't # recurse into unknown directories. # We are doing a loop while self.path_index < len(self.current_dir_list): current_path_info = self.current_dir_list[self.path_index] # dont descend into this unversioned path if it is # a dir if current_path_info[2] in ('directory', 'tree-reference'): del self.current_dir_list[self.path_index] self.path_index = self.path_index - 1 self.path_index = self.path_index + 1 if self.want_unversioned: if current_path_info[2] == 'directory': if self.tree._directory_is_tree_reference( self.utf8_decode(current_path_info[0])[0]): current_path_info = current_path_info[:2] + \ ('tree-reference',) + current_path_info[3:] new_executable = bool( stat.S_ISREG(current_path_info[3].st_mode) and stat.S_IEXEC & current_path_info[3].st_mode) return (None, (None, self.utf8_decode(current_path_info[0])[0]), True, (False, False), (None, None), (None, self.utf8_decode(current_path_info[1])[0]), (None, current_path_info[2]), (None, new_executable)) # This dir info has been handled, go to the next self.path_index = 0 self.current_dir_list = None try: self.current_dir_info = self.dir_iterator.next() self.current_dir_list = self.current_dir_info[1] except StopIteration, _: self.current_dir_info = None else: #(dircmp > 0) # We have a dirblock entry for this location, but there # is no filesystem path for this. This is most likely # because a directory was removed from the disk. # We don't have to report the missing directory, # because that should have already been handled, but we # need to handle all of the files that are contained # within. while self.current_block_pos < len(self.current_block_list): current_entry = self.current_block_list[self.current_block_pos] self.current_block_pos = self.current_block_pos + 1 # entry referring to file not present on disk. # advance the entry only, after processing. result, changed = self._process_entry(current_entry, None) if changed is not None: if changed: self._gather_result_for_consistency(result) if changed or self.include_unchanged: return result self.block_index = self.block_index + 1 self._update_current_block() continue # next loop-on-block/dir result = self._loop_one_block() if result is not None: return result if len(self.search_specific_files): # More supplied paths to process self.current_root = None return self._iter_next() # Start expanding more conservatively, adding paths the user may not # have intended but required for consistent deltas. self.doing_consistency_expansion = 1 if not self._pending_consistent_entries: self._pending_consistent_entries = self._next_consistent_entries() while self._pending_consistent_entries: result, changed = self._pending_consistent_entries.pop() if changed is not None: return result raise StopIteration() cdef object _maybe_tree_ref(self, current_path_info): if self.tree._directory_is_tree_reference( self.utf8_decode(current_path_info[0])[0]): return current_path_info[:2] + \ ('tree-reference',) + current_path_info[3:] else: return current_path_info cdef object _loop_one_block(self): # current_dir_info and current_block refer to the same directory - # this is the common case code. # Assign local variables for current path and entry: cdef object current_entry cdef object current_path_info cdef int path_handled cdef char minikind cdef int cmp_result # cdef char * temp_str # cdef Py_ssize_t temp_str_length # PyString_AsStringAndSize(disk_kind, &temp_str, &temp_str_length) # if not strncmp(temp_str, "directory", temp_str_length): if (self.current_block is not None and self.current_block_pos < PyList_GET_SIZE(self.current_block_list)): current_entry = PyList_GET_ITEM(self.current_block_list, self.current_block_pos) # accomodate pyrex Py_INCREF(current_entry) else: current_entry = None if (self.current_dir_info is not None and self.path_index < PyList_GET_SIZE(self.current_dir_list)): current_path_info = PyList_GET_ITEM(self.current_dir_list, self.path_index) # accomodate pyrex Py_INCREF(current_path_info) disk_kind = PyTuple_GET_ITEM(current_path_info, 2) # accomodate pyrex Py_INCREF(disk_kind) if disk_kind == "directory": current_path_info = self._maybe_tree_ref(current_path_info) else: current_path_info = None while (current_entry is not None or current_path_info is not None): advance_entry = -1 advance_path = -1 result = None changed = None path_handled = 0 if current_entry is None: # unversioned - the check for path_handled when the path # is advanced will yield this path if needed. pass elif current_path_info is None: # no path is fine: the per entry code will handle it. result, changed = self._process_entry(current_entry, current_path_info) else: minikind = _minikind_from_string( current_entry[1][self.target_index][0]) cmp_result = cmp(current_path_info[1], current_entry[0][1]) if (cmp_result or minikind == c'a' or minikind == c'r'): # The current path on disk doesn't match the dirblock # record. Either the dirblock record is marked as # absent/renamed, or the file on disk is not present at all # in the dirblock. Either way, report about the dirblock # entry, and let other code handle the filesystem one. # Compare the basename for these files to determine # which comes first if cmp_result < 0: # extra file on disk: pass for now, but only # increment the path, not the entry advance_entry = 0 else: # entry referring to file not present on disk. # advance the entry only, after processing. result, changed = self._process_entry(current_entry, None) advance_path = 0 else: # paths are the same,and the dirstate entry is not # absent or renamed. result, changed = self._process_entry(current_entry, current_path_info) if changed is not None: path_handled = -1 if not changed and not self.include_unchanged: changed = None # >- loop control starts here: # >- entry if advance_entry and current_entry is not None: self.current_block_pos = self.current_block_pos + 1 if self.current_block_pos < PyList_GET_SIZE(self.current_block_list): current_entry = self.current_block_list[self.current_block_pos] else: current_entry = None # >- path if advance_path and current_path_info is not None: if not path_handled: # unversioned in all regards if self.want_unversioned: new_executable = bool( stat.S_ISREG(current_path_info[3].st_mode) and stat.S_IEXEC & current_path_info[3].st_mode) try: relpath_unicode = self.utf8_decode(current_path_info[0])[0] except UnicodeDecodeError, _: raise errors.BadFilenameEncoding( current_path_info[0], osutils._fs_enc) if changed is not None: raise AssertionError( "result is not None: %r" % result) result = (None, (None, relpath_unicode), True, (False, False), (None, None), (None, self.utf8_decode(current_path_info[1])[0]), (None, current_path_info[2]), (None, new_executable)) changed = True # dont descend into this unversioned path if it is # a dir if current_path_info[2] in ('directory'): del self.current_dir_list[self.path_index] self.path_index = self.path_index - 1 # dont descend the disk iterator into any tree # paths. if current_path_info[2] == 'tree-reference': del self.current_dir_list[self.path_index] self.path_index = self.path_index - 1 self.path_index = self.path_index + 1 if self.path_index < len(self.current_dir_list): current_path_info = self.current_dir_list[self.path_index] if current_path_info[2] == 'directory': current_path_info = self._maybe_tree_ref( current_path_info) else: current_path_info = None if changed is not None: # Found a result on this pass, yield it if changed: self._gather_result_for_consistency(result) if changed or self.include_unchanged: return result if self.current_block is not None: self.block_index = self.block_index + 1 self._update_current_block() if self.current_dir_info is not None: self.path_index = 0 self.current_dir_list = None try: self.current_dir_info = self.dir_iterator.next() self.current_dir_list = self.current_dir_info[1] except StopIteration, _: self.current_dir_info = None cdef object _next_consistent_entries(self): """Grabs the next specific file parent case to consider. :return: A list of the results, each of which is as for _process_entry. """ results = [] while self.search_specific_file_parents: # Process the parent directories for the paths we were iterating. # Even in extremely large trees this should be modest, so currently # no attempt is made to optimise. path_utf8 = self.search_specific_file_parents.pop() if path_utf8 in self.searched_exact_paths: # We've examined this path. continue if osutils.is_inside_any(self.searched_specific_files, path_utf8): # We've examined this path. continue path_entries = self.state._entries_for_path(path_utf8) # We need either one or two entries. If the path in # self.target_index has moved (so the entry in source_index is in # 'ar') then we need to also look for the entry for this path in # self.source_index, to output the appropriate delete-or-rename. selected_entries = [] found_item = False for candidate_entry in path_entries: # Find entries present in target at this path: if candidate_entry[1][self.target_index][0] not in 'ar': found_item = True selected_entries.append(candidate_entry) # Find entries present in source at this path: elif (self.source_index is not None and candidate_entry[1][self.source_index][0] not in 'ar'): found_item = True if candidate_entry[1][self.target_index][0] == 'a': # Deleted, emit it here. selected_entries.append(candidate_entry) else: # renamed, emit it when we process the directory it # ended up at. self.search_specific_file_parents.add( candidate_entry[1][self.target_index][1]) if not found_item: raise AssertionError( "Missing entry for specific path parent %r, %r" % ( path_utf8, path_entries)) path_info = self._path_info(path_utf8, path_utf8.decode('utf8')) for entry in selected_entries: if entry[0][2] in self.seen_ids: continue result, changed = self._process_entry(entry, path_info) if changed is None: raise AssertionError( "Got entry<->path mismatch for specific path " "%r entry %r path_info %r " % ( path_utf8, entry, path_info)) # Only include changes - we're outside the users requested # expansion. if changed: self._gather_result_for_consistency(result) if (result[6][0] == 'directory' and result[6][1] != 'directory'): # This stopped being a directory, the old children have # to be included. if entry[1][self.source_index][0] == 'r': # renamed, take the source path entry_path_utf8 = entry[1][self.source_index][1] else: entry_path_utf8 = path_utf8 initial_key = (entry_path_utf8, '', '') block_index, _ = self.state._find_block_index_from_key( initial_key) if block_index == 0: # The children of the root are in block index 1. block_index = block_index + 1 current_block = None if block_index < len(self.state._dirblocks): current_block = self.state._dirblocks[block_index] if not osutils.is_inside( entry_path_utf8, current_block[0]): # No entries for this directory at all. current_block = None if current_block is not None: for entry in current_block[1]: if entry[1][self.source_index][0] in 'ar': # Not in the source tree, so doesn't have to be # included. continue # Path of the entry itself. self.search_specific_file_parents.add( self.pathjoin(*entry[0][:2])) if changed or self.include_unchanged: results.append((result, changed)) self.searched_exact_paths.add(path_utf8) return results cdef object _path_info(self, utf8_path, unicode_path): """Generate path_info for unicode_path. :return: None if unicode_path does not exist, or a path_info tuple. """ abspath = self.tree.abspath(unicode_path) try: stat = os.lstat(abspath) except OSError, e: if e.errno == errno.ENOENT: # the path does not exist. return None else: raise utf8_basename = utf8_path.rsplit('/', 1)[-1] dir_info = (utf8_path, utf8_basename, osutils.file_kind_from_stat_mode(stat.st_mode), stat, abspath) if dir_info[2] == 'directory': if self.tree._directory_is_tree_reference( unicode_path): self.root_dir_info = self.root_dir_info[:2] + \ ('tree-reference',) + self.root_dir_info[3:] return dir_info bzr-2.7.0/bzrlib/_export_c_api.h0000644000000000000000000000667411264721473014737 0ustar 00000000000000/* Copyright (C) 2009 Canonical Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* This file contains helper functions for exporting a C API for a CPython * extension module. */ #ifndef _EXPORT_C_API_H_ #define _EXPORT_C_API_H_ static const char *_C_API_NAME = "_C_API"; /** * Add a C function to the modules _C_API * This wraps the function in a PyCObject, and inserts that into a dict. * The key of the dict is the function name, and the description is the * signature of the function. * This is generally called during a modules init_MODULE function. * * @param module A Python module (the one being initialized) * @param funcname The name of the function being exported * @param func A pointer to the function * @param signature The C signature of the function * @return 0 if everything is successful, -1 if there is a problem. An * exception should also be set */ static int _export_function(PyObject *module, char *funcname, void *func, char *signature) { PyObject *d = NULL; PyObject *c_obj = NULL; /* (char *) is because python2.4 declares this api as 'char *' rather than * const char* which it really is. */ d = PyObject_GetAttrString(module, (char *)_C_API_NAME); if (!d) { PyErr_Clear(); d = PyDict_New(); if (!d) goto bad; Py_INCREF(d); if (PyModule_AddObject(module, (char *)_C_API_NAME, d) < 0) goto bad; } c_obj = PyCObject_FromVoidPtrAndDesc(func, signature, 0); if (!c_obj) goto bad; if (PyDict_SetItemString(d, funcname, c_obj) < 0) goto bad; Py_DECREF(d); return 0; bad: Py_XDECREF(c_obj); Py_XDECREF(d); return -1; } /* Note: * It feels like more could be done here. Specifically, if you look at * _static_tuple_c.h you can see some boilerplate where we have: * #ifdef STATIC_TUPLE_MODULE // are we exporting or importing * static RETVAL FUNCNAME PROTO; * #else * static RETVAL (*FUNCNAME) PROTO; * #endif * * And then in _static_tuple_c.c we have * int setup_c_api() * { * _export_function(module, #FUNCNAME, FUNCNAME, #PROTO); * } * * And then in _static_tuple_c.h import_##MODULE * struct function_definition functions[] = { * {#FUNCNAME, (void **)&FUNCNAME, #RETVAL #PROTO}, * ... * {NULL}}; * * And some similar stuff for types. However, this would mean that we would * need a way for the C preprocessor to build up a list of definitions to be * generated, and then expand that list at the appropriate time. * I would guess there would be a way to do this, but probably not without a * lot of magic, and the end result probably wouldn't be very pretty to * maintain. Perhaps python's dynamic nature has left me jaded about writing * boilerplate.... */ #endif // _EXPORT_C_API_H_ bzr-2.7.0/bzrlib/_groupcompress_py.py0000644000000000000000000004476311673635356016115 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Python version of compiled extensions for doing compression. We separate the implementation from the groupcompress.py to avoid importing useless stuff. """ from __future__ import absolute_import from bzrlib import osutils class _OutputHandler(object): """A simple class which just tracks how to split up an insert request.""" def __init__(self, out_lines, index_lines, min_len_to_index): self.out_lines = out_lines self.index_lines = index_lines self.min_len_to_index = min_len_to_index self.cur_insert_lines = [] self.cur_insert_len = 0 def add_copy(self, start_byte, end_byte): # The data stream allows >64kB in a copy, but to match the compiled # code, we will also limit it to a 64kB copy for start_byte in xrange(start_byte, end_byte, 64*1024): num_bytes = min(64*1024, end_byte - start_byte) copy_bytes = encode_copy_instruction(start_byte, num_bytes) self.out_lines.append(copy_bytes) self.index_lines.append(False) def _flush_insert(self): if not self.cur_insert_lines: return if self.cur_insert_len > 127: raise AssertionError('We cannot insert more than 127 bytes' ' at a time.') self.out_lines.append(chr(self.cur_insert_len)) self.index_lines.append(False) self.out_lines.extend(self.cur_insert_lines) if self.cur_insert_len < self.min_len_to_index: self.index_lines.extend([False]*len(self.cur_insert_lines)) else: self.index_lines.extend([True]*len(self.cur_insert_lines)) self.cur_insert_lines = [] self.cur_insert_len = 0 def _insert_long_line(self, line): # Flush out anything pending self._flush_insert() line_len = len(line) for start_index in xrange(0, line_len, 127): next_len = min(127, line_len - start_index) self.out_lines.append(chr(next_len)) self.index_lines.append(False) self.out_lines.append(line[start_index:start_index+next_len]) # We don't index long lines, because we won't be able to match # a line split across multiple inserts anway self.index_lines.append(False) def add_insert(self, lines): if self.cur_insert_lines != []: raise AssertionError('self.cur_insert_lines must be empty when' ' adding a new insert') for line in lines: if len(line) > 127: self._insert_long_line(line) else: next_len = len(line) + self.cur_insert_len if next_len > 127: # Adding this line would overflow, so flush, and start over self._flush_insert() self.cur_insert_lines = [line] self.cur_insert_len = len(line) else: self.cur_insert_lines.append(line) self.cur_insert_len = next_len self._flush_insert() class LinesDeltaIndex(object): """This class indexes matches between strings. :ivar lines: The 'static' lines that will be preserved between runs. :ivar _matching_lines: A dict of {line:[matching offsets]} :ivar line_offsets: The byte offset for the end of each line, used to quickly map between a matching line number and the byte location :ivar endpoint: The total number of bytes in self.line_offsets """ _MIN_MATCH_BYTES = 10 _SOFT_MIN_MATCH_BYTES = 200 def __init__(self, lines): self.lines = [] self.line_offsets = [] self.endpoint = 0 self._matching_lines = {} self.extend_lines(lines, [True]*len(lines)) def _update_matching_lines(self, new_lines, index): matches = self._matching_lines start_idx = len(self.lines) if len(new_lines) != len(index): raise AssertionError('The number of lines to be indexed does' ' not match the index/don\'t index flags: %d != %d' % (len(new_lines), len(index))) for idx, do_index in enumerate(index): if not do_index: continue line = new_lines[idx] try: matches[line].add(start_idx + idx) except KeyError: matches[line] = set([start_idx + idx]) def get_matches(self, line): """Return the lines which match the line in right.""" try: return self._matching_lines[line] except KeyError: return None def _get_longest_match(self, lines, pos): """Look at all matches for the current line, return the longest. :param lines: The lines we are matching against :param pos: The current location we care about :param locations: A list of lines that matched the current location. This may be None, but often we'll have already found matches for this line. :return: (start_in_self, start_in_lines, num_lines) All values are the offset in the list (aka the line number) If start_in_self is None, then we have no matches, and this line should be inserted in the target. """ range_start = pos range_len = 0 prev_locations = None max_pos = len(lines) matching = self._matching_lines while pos < max_pos: try: locations = matching[lines[pos]] except KeyError: # No more matches, just return whatever we have, but we know # that this last position is not going to match anything pos += 1 break # We have a match if prev_locations is None: # This is the first match in a range prev_locations = locations range_len = 1 locations = None # Consumed else: # We have a match started, compare to see if any of the # current matches can be continued next_locations = locations.intersection([loc + 1 for loc in prev_locations]) if next_locations: # At least one of the regions continues to match prev_locations = set(next_locations) range_len += 1 locations = None # Consumed else: # All current regions no longer match. # This line does still match something, just not at the # end of the previous matches. We will return locations # so that we can avoid another _matching_lines lookup. break pos += 1 if prev_locations is None: # We have no matches, this is a pure insert return None, pos smallest = min(prev_locations) return (smallest - range_len + 1, range_start, range_len), pos def get_matching_blocks(self, lines, soft=False): """Return the ranges in lines which match self.lines. :param lines: lines to compress :return: A list of (old_start, new_start, length) tuples which reflect a region in self.lines that is present in lines. The last element of the list is always (old_len, new_len, 0) to provide a end point for generating instructions from the matching blocks list. """ # In this code, we iterate over multiple _get_longest_match calls, to # find the next longest copy, and possible insert regions. We then # convert that to the simple matching_blocks representation, since # otherwise inserting 10 lines in a row would show up as 10 # instructions. result = [] pos = 0 max_pos = len(lines) result_append = result.append min_match_bytes = self._MIN_MATCH_BYTES if soft: min_match_bytes = self._SOFT_MIN_MATCH_BYTES while pos < max_pos: block, pos = self._get_longest_match(lines, pos) if block is not None: # Check to see if we match fewer than min_match_bytes. As we # will turn this into a pure 'insert', rather than a copy. # block[-1] is the number of lines. A quick check says if we # have more lines than min_match_bytes, then we know we have # enough bytes. if block[-1] < min_match_bytes: # This block may be a 'short' block, check old_start, new_start, range_len = block matched_bytes = sum(map(len, lines[new_start:new_start + range_len])) if matched_bytes < min_match_bytes: block = None if block is not None: result_append(block) result_append((len(self.lines), len(lines), 0)) return result def extend_lines(self, lines, index): """Add more lines to the left-lines list. :param lines: A list of lines to add :param index: A True/False for each node to define if it should be indexed. """ self._update_matching_lines(lines, index) self.lines.extend(lines) endpoint = self.endpoint for line in lines: endpoint += len(line) self.line_offsets.append(endpoint) if len(self.line_offsets) != len(self.lines): raise AssertionError('Somehow the line offset indicator' ' got out of sync with the line counter.') self.endpoint = endpoint def _flush_insert(self, start_linenum, end_linenum, new_lines, out_lines, index_lines): """Add an 'insert' request to the data stream.""" bytes_to_insert = ''.join(new_lines[start_linenum:end_linenum]) insert_length = len(bytes_to_insert) # Each insert instruction is at most 127 bytes long for start_byte in xrange(0, insert_length, 127): insert_count = min(insert_length - start_byte, 127) out_lines.append(chr(insert_count)) # Don't index the 'insert' instruction index_lines.append(False) insert = bytes_to_insert[start_byte:start_byte+insert_count] as_lines = osutils.split_lines(insert) out_lines.extend(as_lines) index_lines.extend([True]*len(as_lines)) def _flush_copy(self, old_start_linenum, num_lines, out_lines, index_lines): if old_start_linenum == 0: first_byte = 0 else: first_byte = self.line_offsets[old_start_linenum - 1] stop_byte = self.line_offsets[old_start_linenum + num_lines - 1] num_bytes = stop_byte - first_byte # The data stream allows >64kB in a copy, but to match the compiled # code, we will also limit it to a 64kB copy for start_byte in xrange(first_byte, stop_byte, 64*1024): num_bytes = min(64*1024, stop_byte - start_byte) copy_bytes = encode_copy_instruction(start_byte, num_bytes) out_lines.append(copy_bytes) index_lines.append(False) def make_delta(self, new_lines, bytes_length=None, soft=False): """Compute the delta for this content versus the original content.""" if bytes_length is None: bytes_length = sum(map(len, new_lines)) # reserved for content type, content length out_lines = ['', '', encode_base128_int(bytes_length)] index_lines = [False, False, False] output_handler = _OutputHandler(out_lines, index_lines, self._MIN_MATCH_BYTES) blocks = self.get_matching_blocks(new_lines, soft=soft) current_line_num = 0 # We either copy a range (while there are reusable lines) or we # insert new lines. To find reusable lines we traverse for old_start, new_start, range_len in blocks: if new_start != current_line_num: # non-matching region, insert the content output_handler.add_insert(new_lines[current_line_num:new_start]) current_line_num = new_start + range_len if range_len: # Convert the line based offsets into byte based offsets if old_start == 0: first_byte = 0 else: first_byte = self.line_offsets[old_start - 1] last_byte = self.line_offsets[old_start + range_len - 1] output_handler.add_copy(first_byte, last_byte) return out_lines, index_lines def encode_base128_int(val): """Convert an integer into a 7-bit lsb encoding.""" bytes = [] count = 0 while val >= 0x80: bytes.append(chr((val | 0x80) & 0xFF)) val >>= 7 bytes.append(chr(val)) return ''.join(bytes) def decode_base128_int(bytes): """Decode an integer from a 7-bit lsb encoding.""" offset = 0 val = 0 shift = 0 bval = ord(bytes[offset]) while bval >= 0x80: val |= (bval & 0x7F) << shift shift += 7 offset += 1 bval = ord(bytes[offset]) val |= bval << shift offset += 1 return val, offset def encode_copy_instruction(offset, length): """Convert this offset into a control code and bytes.""" copy_command = 0x80 copy_bytes = [None] for copy_bit in (0x01, 0x02, 0x04, 0x08): base_byte = offset & 0xff if base_byte: copy_command |= copy_bit copy_bytes.append(chr(base_byte)) offset >>= 8 if length is None: raise ValueError("cannot supply a length of None") if length > 0x10000: raise ValueError("we don't emit copy records for lengths > 64KiB") if length == 0: raise ValueError("We cannot emit a copy of length 0") if length != 0x10000: # A copy of length exactly 64*1024 == 0x10000 is sent as a length of 0, # since that saves bytes for large chained copies for copy_bit in (0x10, 0x20): base_byte = length & 0xff if base_byte: copy_command |= copy_bit copy_bytes.append(chr(base_byte)) length >>= 8 copy_bytes[0] = chr(copy_command) return ''.join(copy_bytes) def decode_copy_instruction(bytes, cmd, pos): """Decode a copy instruction from the next few bytes. A copy instruction is a variable number of bytes, so we will parse the bytes we care about, and return the new position, as well as the offset and length referred to in the bytes. :param bytes: A string of bytes :param cmd: The command code :param pos: The position in bytes right after the copy command :return: (offset, length, newpos) The offset of the copy start, the number of bytes to copy, and the position after the last byte of the copy """ if cmd & 0x80 != 0x80: raise ValueError('copy instructions must have bit 0x80 set') offset = 0 length = 0 if (cmd & 0x01): offset = ord(bytes[pos]) pos += 1 if (cmd & 0x02): offset = offset | (ord(bytes[pos]) << 8) pos += 1 if (cmd & 0x04): offset = offset | (ord(bytes[pos]) << 16) pos += 1 if (cmd & 0x08): offset = offset | (ord(bytes[pos]) << 24) pos += 1 if (cmd & 0x10): length = ord(bytes[pos]) pos += 1 if (cmd & 0x20): length = length | (ord(bytes[pos]) << 8) pos += 1 if (cmd & 0x40): length = length | (ord(bytes[pos]) << 16) pos += 1 if length == 0: length = 65536 return (offset, length, pos) def make_delta(source_bytes, target_bytes): """Create a delta from source to target.""" if type(source_bytes) is not str: raise TypeError('source is not a str') if type(target_bytes) is not str: raise TypeError('target is not a str') line_locations = LinesDeltaIndex(osutils.split_lines(source_bytes)) delta, _ = line_locations.make_delta(osutils.split_lines(target_bytes), bytes_length=len(target_bytes)) return ''.join(delta) def apply_delta(basis, delta): """Apply delta to this object to become new_version_id.""" if type(basis) is not str: raise TypeError('basis is not a str') if type(delta) is not str: raise TypeError('delta is not a str') target_length, pos = decode_base128_int(delta) lines = [] len_delta = len(delta) while pos < len_delta: cmd = ord(delta[pos]) pos += 1 if cmd & 0x80: offset, length, pos = decode_copy_instruction(delta, cmd, pos) last = offset + length if last > len(basis): raise ValueError('data would copy bytes past the' 'end of source') lines.append(basis[offset:last]) else: # Insert of 'cmd' bytes if cmd == 0: raise ValueError('Command == 0 not supported yet') lines.append(delta[pos:pos+cmd]) pos += cmd bytes = ''.join(lines) if len(bytes) != target_length: raise ValueError('Delta claimed to be %d long, but ended up' ' %d long' % (target_length, len(bytes))) return bytes def apply_delta_to_source(source, delta_start, delta_end): """Extract a delta from source bytes, and apply it.""" source_size = len(source) if delta_start >= source_size: raise ValueError('delta starts after source') if delta_end > source_size: raise ValueError('delta ends after source') if delta_start >= delta_end: raise ValueError('delta starts after it ends') delta_bytes = source[delta_start:delta_end] return apply_delta(source, delta_bytes) bzr-2.7.0/bzrlib/_groupcompress_pyx.pyx0000644000000000000000000005416311564232056016455 0ustar 00000000000000# Copyright (C) 2008, 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Compiled extensions for doing compression.""" #python2.4 support cdef extern from "python-compat.h": pass cdef extern from "Python.h": ctypedef struct PyObject: pass ctypedef int Py_ssize_t # Required for older pyrex versions int PyString_CheckExact(object) char * PyString_AS_STRING(object) Py_ssize_t PyString_GET_SIZE(object) object PyString_FromStringAndSize(char *, Py_ssize_t) cdef extern from *: ctypedef unsigned long size_t void * malloc(size_t) nogil void * realloc(void *, size_t) nogil void free(void *) nogil void memcpy(void *, void *, size_t) nogil cdef extern from "delta.h": struct source_info: void *buf unsigned long size unsigned long agg_offset struct delta_index: pass ctypedef enum delta_result: DELTA_OK DELTA_OUT_OF_MEMORY DELTA_INDEX_NEEDED DELTA_SOURCE_EMPTY DELTA_SOURCE_BAD DELTA_BUFFER_EMPTY DELTA_SIZE_TOO_BIG delta_result create_delta_index(source_info *src, delta_index *old, delta_index **fresh, int max_entries) nogil delta_result create_delta_index_from_delta(source_info *delta, delta_index *old, delta_index **fresh) nogil void free_delta_index(delta_index *index) nogil delta_result create_delta(delta_index *indexes, void *buf, unsigned long bufsize, unsigned long *delta_size, unsigned long max_delta_size, void **delta_data) nogil unsigned long get_delta_hdr_size(unsigned char **datap, unsigned char *top) nogil unsigned long sizeof_delta_index(delta_index *index) Py_ssize_t DELTA_SIZE_MIN int get_hash_offset(delta_index *index, int pos, unsigned int *hash_offset) int get_entry_summary(delta_index *index, int pos, unsigned int *global_offset, unsigned int *hash_val) unsigned int rabin_hash (unsigned char *data) cdef void *safe_malloc(size_t count) except NULL: cdef void *result result = malloc(count) if result == NULL: raise MemoryError('Failed to allocate %d bytes of memory' % (count,)) return result cdef void *safe_realloc(void * old, size_t count) except NULL: cdef void *result result = realloc(old, count) if result == NULL: raise MemoryError('Failed to reallocate to %d bytes of memory' % (count,)) return result cdef int safe_free(void **val) except -1: assert val != NULL if val[0] != NULL: free(val[0]) val[0] = NULL def make_delta_index(source): return DeltaIndex(source) cdef object _translate_delta_failure(delta_result result): if result == DELTA_OUT_OF_MEMORY: return MemoryError("Delta function failed to allocate memory") elif result == DELTA_INDEX_NEEDED: return ValueError("Delta function requires delta_index param") elif result == DELTA_SOURCE_EMPTY: return ValueError("Delta function given empty source_info param") elif result == DELTA_SOURCE_BAD: return RuntimeError("Delta function given invalid source_info param") elif result == DELTA_BUFFER_EMPTY: return ValueError("Delta function given empty buffer params") return AssertionError("Unrecognised delta result code: %d" % result) def _rabin_hash(content): if not PyString_CheckExact(content): raise ValueError('content must be a string') if len(content) < 16: raise ValueError('content must be at least 16 bytes long') # Try to cast it to an int, if it can fit return int(rabin_hash((PyString_AS_STRING(content)))) cdef class DeltaIndex: # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object # isn't performance critical # cdef readonly list _sources cdef readonly object _sources cdef source_info *_source_infos cdef delta_index *_index cdef public unsigned long _source_offset cdef readonly unsigned int _max_num_sources cdef public int _max_bytes_to_index def __init__(self, source=None, max_bytes_to_index=None): self._sources = [] self._index = NULL self._max_num_sources = 65000 self._source_infos = safe_malloc(sizeof(source_info) * self._max_num_sources) self._source_offset = 0 self._max_bytes_to_index = 0 if max_bytes_to_index is not None: self._max_bytes_to_index = max_bytes_to_index if source is not None: self.add_source(source, 0) def __sizeof__(self): # We want to track the _source_infos allocations, but the referenced # void* are actually tracked in _sources itself. # XXX: Cython is capable of doing sizeof(class) and returning the size # of the underlying struct. Pyrex (<= 0.9.9) refuses, so we need # to do it manually. *sigh* Note that we might get it wrong # because of alignment issues. cdef Py_ssize_t size # PyObject start, vtable *, 3 object pointers, 2 C ints size = ((sizeof(PyObject) + sizeof(void*) + 3*sizeof(PyObject*) + sizeof(unsigned long) + sizeof(unsigned int)) + (sizeof(source_info) * self._max_num_sources) + sizeof_delta_index(self._index)) return size def __repr__(self): return '%s(%d, %d)' % (self.__class__.__name__, len(self._sources), self._source_offset) def __dealloc__(self): if self._index != NULL: free_delta_index(self._index) self._index = NULL safe_free(&self._source_infos) def _has_index(self): return (self._index != NULL) def _dump_index(self): """Dump the pointers in the index. This is an arbitrary layout, used for testing. It is not meant to be used in production code. :return: (hash_list, entry_list) hash_list A list of offsets, so hash[i] points to the 'hash bucket' starting at the given offset and going until hash[i+1] entry_list A list of (text_offset, hash_val). text_offset is the offset in the "source" texts, and hash_val is the RABIN hash for that offset. Note that the entry should be in the hash bucket defined by hash[(hash_val & mask)] && hash[(hash_val & mask) + 1] """ cdef int pos cdef unsigned int text_offset cdef unsigned int hash_val cdef unsigned int hash_offset if self._index == NULL: return None hash_list = [] pos = 0 while get_hash_offset(self._index, pos, &hash_offset): hash_list.append(int(hash_offset)) pos += 1 entry_list = [] pos = 0 while get_entry_summary(self._index, pos, &text_offset, &hash_val): # Map back using 'int' so that we don't get Long everywhere, when # almost everything is <2**31. val = tuple(map(int, [text_offset, hash_val])) entry_list.append(val) pos += 1 return hash_list, entry_list def add_delta_source(self, delta, unadded_bytes): """Add a new delta to the source texts. :param delta: The text of the delta, this must be a byte string. :param unadded_bytes: Number of bytes that were added to the source that were not indexed. """ cdef char *c_delta cdef Py_ssize_t c_delta_size cdef delta_index *index cdef delta_result res cdef unsigned int source_location cdef source_info *src cdef unsigned int num_indexes if not PyString_CheckExact(delta): raise TypeError('delta is not a str') source_location = len(self._sources) if source_location >= self._max_num_sources: self._expand_sources() self._sources.append(delta) c_delta = PyString_AS_STRING(delta) c_delta_size = PyString_GET_SIZE(delta) src = self._source_infos + source_location src.buf = c_delta src.size = c_delta_size src.agg_offset = self._source_offset + unadded_bytes with nogil: res = create_delta_index_from_delta(src, self._index, &index) if res != DELTA_OK: raise _translate_delta_failure(res) self._source_offset = src.agg_offset + src.size if index != self._index: free_delta_index(self._index) self._index = index def add_source(self, source, unadded_bytes): """Add a new bit of source text to the delta indexes. :param source: The text in question, this must be a byte string :param unadded_bytes: Assume there are this many bytes that didn't get added between this source and the end of the previous source. :param max_pointers: Add no more than this many entries to the index. By default, we sample every 16 bytes, if that would require more than max_entries, we will reduce the sampling rate. A value of 0 means unlimited, None means use the default limit. """ cdef char *c_source cdef Py_ssize_t c_source_size cdef delta_index *index cdef delta_result res cdef unsigned int source_location cdef source_info *src cdef unsigned int num_indexes cdef int max_num_entries if not PyString_CheckExact(source): raise TypeError('source is not a str') source_location = len(self._sources) if source_location >= self._max_num_sources: self._expand_sources() if source_location != 0 and self._index == NULL: # We were lazy about populating the index, create it now self._populate_first_index() self._sources.append(source) c_source = PyString_AS_STRING(source) c_source_size = PyString_GET_SIZE(source) src = self._source_infos + source_location src.buf = c_source src.size = c_source_size src.agg_offset = self._source_offset + unadded_bytes self._source_offset = src.agg_offset + src.size # We delay creating the index on the first insert if source_location != 0: with nogil: res = create_delta_index(src, self._index, &index, self._max_bytes_to_index) if res != DELTA_OK: raise _translate_delta_failure(res) if index != self._index: free_delta_index(self._index) self._index = index cdef _populate_first_index(self): cdef delta_index *index cdef delta_result res if len(self._sources) != 1 or self._index != NULL: raise AssertionError('_populate_first_index should only be' ' called when we have a single source and no index yet') # We know that self._index is already NULL, so create_delta_index # will always create a new index unless there's a malloc failure with nogil: res = create_delta_index(&self._source_infos[0], NULL, &index, self._max_bytes_to_index) if res != DELTA_OK: raise _translate_delta_failure(res) self._index = index cdef _expand_sources(self): raise RuntimeError('if we move self._source_infos, then we need to' ' change all of the index pointers as well.') self._max_num_sources = self._max_num_sources * 2 self._source_infos = safe_realloc(self._source_infos, sizeof(source_info) * self._max_num_sources) def make_delta(self, target_bytes, max_delta_size=0): """Create a delta from the current source to the target bytes.""" cdef char *target cdef Py_ssize_t target_size cdef void * delta cdef unsigned long delta_size cdef unsigned long c_max_delta_size cdef delta_result res if self._index == NULL: if len(self._sources) == 0: return None # We were just lazy about generating the index self._populate_first_index() if not PyString_CheckExact(target_bytes): raise TypeError('target is not a str') target = PyString_AS_STRING(target_bytes) target_size = PyString_GET_SIZE(target_bytes) # TODO: inline some of create_delta so we at least don't have to double # malloc, and can instead use PyString_FromStringAndSize, to # allocate the bytes into the final string c_max_delta_size = max_delta_size with nogil: res = create_delta(self._index, target, target_size, &delta_size, c_max_delta_size, &delta) result = None if res == DELTA_OK: result = PyString_FromStringAndSize(delta, delta_size) free(delta) elif res != DELTA_SIZE_TOO_BIG: raise _translate_delta_failure(res) return result def make_delta(source_bytes, target_bytes): """Create a delta, this is a wrapper around DeltaIndex.make_delta.""" di = DeltaIndex(source_bytes) return di.make_delta(target_bytes) def apply_delta(source_bytes, delta_bytes): """Apply a delta generated by make_delta to source_bytes.""" cdef char *source cdef Py_ssize_t source_size cdef char *delta cdef Py_ssize_t delta_size if not PyString_CheckExact(source_bytes): raise TypeError('source is not a str') if not PyString_CheckExact(delta_bytes): raise TypeError('delta is not a str') source = PyString_AS_STRING(source_bytes) source_size = PyString_GET_SIZE(source_bytes) delta = PyString_AS_STRING(delta_bytes) delta_size = PyString_GET_SIZE(delta_bytes) # Code taken from patch-delta.c, only brought here to give better error # handling, and to avoid double allocating memory if (delta_size < DELTA_SIZE_MIN): # XXX: Invalid delta block raise RuntimeError('delta_size %d smaller than min delta size %d' % (delta_size, DELTA_SIZE_MIN)) return _apply_delta(source, source_size, delta, delta_size) cdef unsigned char *_decode_copy_instruction(unsigned char *bytes, unsigned char cmd, unsigned int *offset, unsigned int *length) nogil: # cannot_raise """Decode a copy instruction from the next few bytes. A copy instruction is a variable number of bytes, so we will parse the bytes we care about, and return the new position, as well as the offset and length referred to in the bytes. :param bytes: Pointer to the start of bytes after cmd :param cmd: The command code :return: Pointer to the bytes just after the last decode byte """ cdef unsigned int off, size, count off = 0 size = 0 count = 0 if (cmd & 0x01): off = bytes[count] count = count + 1 if (cmd & 0x02): off = off | (bytes[count] << 8) count = count + 1 if (cmd & 0x04): off = off | (bytes[count] << 16) count = count + 1 if (cmd & 0x08): off = off | (bytes[count] << 24) count = count + 1 if (cmd & 0x10): size = bytes[count] count = count + 1 if (cmd & 0x20): size = size | (bytes[count] << 8) count = count + 1 if (cmd & 0x40): size = size | (bytes[count] << 16) count = count + 1 if (size == 0): size = 0x10000 offset[0] = off length[0] = size return bytes + count cdef object _apply_delta(char *source, Py_ssize_t source_size, char *delta, Py_ssize_t delta_size): """common functionality between apply_delta and apply_delta_to_source.""" cdef unsigned char *data, *top cdef unsigned char *dst_buf, *out, cmd cdef Py_ssize_t size cdef unsigned int cp_off, cp_size cdef int failed data = delta top = data + delta_size # now the result size size = get_delta_hdr_size(&data, top) result = PyString_FromStringAndSize(NULL, size) dst_buf = PyString_AS_STRING(result) failed = 0 with nogil: out = dst_buf while (data < top): cmd = data[0] data = data + 1 if (cmd & 0x80): # Copy instruction data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size) if (cp_off + cp_size < cp_size or cp_off + cp_size > source_size or cp_size > size): failed = 1 break memcpy(out, source + cp_off, cp_size) out = out + cp_size size = size - cp_size else: # Insert instruction if cmd == 0: # cmd == 0 is reserved for future encoding # extensions. In the mean time we must fail when # encountering them (might be data corruption). failed = 2 break if cmd > size: failed = 3 break memcpy(out, data, cmd) out = out + cmd data = data + cmd size = size - cmd if failed: if failed == 1: raise ValueError('Something wrong with:' ' cp_off = %s, cp_size = %s' ' source_size = %s, size = %s' % (cp_off, cp_size, source_size, size)) elif failed == 2: raise ValueError('Got delta opcode: 0, not supported') elif failed == 3: raise ValueError('Insert instruction longer than remaining' ' bytes: %d > %d' % (cmd, size)) # sanity check if (data != top or size != 0): raise RuntimeError('Did not extract the number of bytes we expected' ' we were left with %d bytes in "size", and top - data = %d' % (size, (top - data))) return None # *dst_size = out - dst_buf; if (out - dst_buf) != PyString_GET_SIZE(result): raise RuntimeError('Number of bytes extracted did not match the' ' size encoded in the delta header.') return result def apply_delta_to_source(source, delta_start, delta_end): """Extract a delta from source bytes, and apply it.""" cdef char *c_source cdef Py_ssize_t c_source_size cdef char *c_delta cdef Py_ssize_t c_delta_size cdef Py_ssize_t c_delta_start, c_delta_end if not PyString_CheckExact(source): raise TypeError('source is not a str') c_source_size = PyString_GET_SIZE(source) c_delta_start = delta_start c_delta_end = delta_end if c_delta_start >= c_source_size: raise ValueError('delta starts after source') if c_delta_end > c_source_size: raise ValueError('delta ends after source') if c_delta_start >= c_delta_end: raise ValueError('delta starts after it ends') c_delta_size = c_delta_end - c_delta_start c_source = PyString_AS_STRING(source) c_delta = c_source + c_delta_start # We don't use source_size, because we know the delta should not refer to # any bytes after it starts return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size) def encode_base128_int(val): """Convert an integer into a 7-bit lsb encoding.""" cdef unsigned int c_val cdef Py_ssize_t count cdef unsigned int num_bytes cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes c_val = val count = 0 while c_val >= 0x80 and count < 8: c_bytes[count] = ((c_val | 0x80) & 0xFF) c_val = c_val >> 7 count = count + 1 if count >= 8 or c_val >= 0x80: raise ValueError('encode_base128_int overflowed the buffer') c_bytes[count] = (c_val & 0xFF) count = count + 1 return PyString_FromStringAndSize(c_bytes, count) def decode_base128_int(bytes): """Decode an integer from a 7-bit lsb encoding.""" cdef int offset cdef int val cdef unsigned int uval cdef int shift cdef Py_ssize_t num_low_bytes cdef unsigned char *c_bytes offset = 0 val = 0 shift = 0 if not PyString_CheckExact(bytes): raise TypeError('bytes is not a string') c_bytes = PyString_AS_STRING(bytes) # We take off 1, because we have to be able to decode the non-expanded byte num_low_bytes = PyString_GET_SIZE(bytes) - 1 while (c_bytes[offset] & 0x80) and offset < num_low_bytes: val = val | ((c_bytes[offset] & 0x7F) << shift) shift = shift + 7 offset = offset + 1 if c_bytes[offset] & 0x80: raise ValueError('Data not properly formatted, we ran out of' ' bytes before 0x80 stopped being set.') val = val | (c_bytes[offset] << shift) offset = offset + 1 if val < 0: uval = val return uval, offset return val, offset bzr-2.7.0/bzrlib/_import_c_api.h0000644000000000000000000001372211264721473014720 0ustar 00000000000000/* Copyright (C) 2009 Canonical Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _IMPORT_C_API_H_ #define _IMPORT_C_API_H_ /** * Helper functions to eliminate some of the boilerplate when importing a C API * from a CPython extension module. * * For more information see _export_c_api.h */ static const char *_C_API_NAME = "_C_API"; /** * Import a function from the _C_API_NAME dict that is part of module. * * @param module The Python module we are importing from * the attribute _C_API_NAME will be used as a dictionary * containing the function pointer we are looking for. * @param funcname Name of the function we want to import * @param func A pointer to the function handle where we will store the * function. * @param signature The C signature of the function. This is validated * against the signature stored in the C api, to make sure * there is no versioning skew. */ static int _import_function(PyObject *module, const char *funcname, void **func, const char *signature) { PyObject *d = NULL; PyObject *c_obj = NULL; const char *desc = NULL; /* (char *) because Python2.4 defines this as (char *) rather than * (const char *) */ d = PyObject_GetAttrString(module, (char *)_C_API_NAME); if (!d) { // PyObject_GetAttrString sets an appropriate exception goto bad; } c_obj = PyDict_GetItemString(d, funcname); if (!c_obj) { // PyDict_GetItemString does not set an exception PyErr_Format(PyExc_AttributeError, "Module %s did not export a function named %s\n", PyModule_GetName(module), funcname); goto bad; } desc = (char *)PyCObject_GetDesc(c_obj); if (!desc || strcmp(desc, signature) != 0) { if (desc == NULL) { desc = ""; } PyErr_Format(PyExc_TypeError, "C function %s.%s has wrong signature (expected %s, got %s)", PyModule_GetName(module), funcname, signature, desc); goto bad; } *func = PyCObject_AsVoidPtr(c_obj); Py_DECREF(d); return 0; bad: Py_XDECREF(d); return -1; } /** * Get a pointer to an exported PyTypeObject. * * @param module The Python module we are importing from * @param class_name Attribute of the module that should reference the * Type object. Note that a PyTypeObject is the python * description of the type, not the raw C structure. * @return A Pointer to the requested type object. On error NULL will be * returned and an exception will be set. */ static PyTypeObject * _import_type(PyObject *module, const char *class_name) { PyObject *type = NULL; type = PyObject_GetAttrString(module, (char *)class_name); if (!type) { goto bad; } if (!PyType_Check(type)) { PyErr_Format(PyExc_TypeError, "%s.%s is not a type object", PyModule_GetName(module), class_name); goto bad; } return (PyTypeObject *)type; bad: Py_XDECREF(type); return NULL; } struct function_description { const char *name; void **pointer; const char *signature; }; struct type_description { const char *name; PyTypeObject **pointer; }; /** * Helper for importing several functions and types in a data-driven manner. * * @param module The name of the module we will be importing * @param functions A list of function_description objects, describing the * functions being imported. * The list should be terminated with {NULL} to indicate * there are no more functions to import. * @param types A list of type_description objects describing type * objects that we want to import. The list should be * terminated with {NULL} to indicate there are no more * types to import. * @return 0 on success, -1 on error and an exception should be set. */ static int _import_extension_module(const char *module_name, struct function_description *functions, struct type_description *types) { PyObject *module = NULL; struct function_description *cur_func; struct type_description *cur_type; int ret_code; module = PyImport_ImportModule((char *)module_name); if (!module) goto bad; if (functions != NULL) { cur_func = functions; while (cur_func->name != NULL) { ret_code = _import_function(module, cur_func->name, cur_func->pointer, cur_func->signature); if (ret_code < 0) goto bad; cur_func++; } } if (types != NULL) { PyTypeObject *type_p = NULL; cur_type = types; while (cur_type->name != NULL) { type_p = _import_type(module, cur_type->name); if (type_p == NULL) goto bad; *(cur_type->pointer) = type_p; cur_type++; } } Py_XDECREF(module); return 0; bad: Py_XDECREF(module); return -1; } #endif // _IMPORT_C_API_H_ bzr-2.7.0/bzrlib/_knit_load_data_py.py0000644000000000000000000000733611673403246016124 0ustar 00000000000000# Copyright (C) 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import from bzrlib import errors def _load_data_py(kndx, fp): """Read in a knit index.""" cache = kndx._cache history = kndx._history kndx.check_header(fp) # readlines reads the whole file at once: # bad for transports like http, good for local disk # we save 60 ms doing this one change ( # from calling readline each time to calling # readlines once. # probably what we want for nice behaviour on # http is a incremental readlines that yields, or # a check for local vs non local indexes, history_top = len(history) - 1 for line in fp.readlines(): rec = line.split() if len(rec) < 5 or rec[-1] != ':': # corrupt line. # FIXME: in the future we should determine if its a # short write - and ignore it # or a different failure, and raise. RBC 20060407 continue try: parents = [] for value in rec[4:-1]: if value[0] == '.': # uncompressed reference parent_id = value[1:] else: parent_id = history[int(value)] parents.append(parent_id) except (IndexError, ValueError), e: # The parent could not be decoded to get its parent row. This # at a minimum will cause this row to have wrong parents, or # even to apply a delta to the wrong base and decode # incorrectly. its therefore not usable, and because we have # encountered a situation where a new knit index had this # corrupt we can't asssume that no other rows referring to the # index of this record actually mean the subsequent uncorrupt # one, so we error. raise errors.KnitCorrupt(kndx._filename, "line %r: %s" % (rec, e)) version_id, options, pos, size = rec[:4] version_id = version_id try: pos = int(pos) except ValueError, e: raise errors.KnitCorrupt(kndx._filename, "invalid position on line %r: %s" % (rec, e)) try: size = int(size) except ValueError, e: raise errors.KnitCorrupt(kndx._filename, "invalid size on line %r: %s" % (rec, e)) # See kndx._cache_version # only want the _history index to reference the 1st # index entry for version_id if version_id not in cache: history_top += 1 index = history_top history.append(version_id) else: index = cache[version_id][5] cache[version_id] = (version_id, options.split(','), pos, size, tuple(parents), index) # end kndx._cache_version bzr-2.7.0/bzrlib/_knit_load_data_pyx.pyx0000644000000000000000000002463611337021464016501 0ustar 00000000000000# Copyright (C) 2007-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Pyrex extensions to knit parsing.""" import sys from bzrlib import errors cdef extern from "stdlib.h": ctypedef unsigned size_t long int strtol(char *nptr, char **endptr, int base) cdef extern from "Python.h": int PyDict_CheckExact(object) void *PyDict_GetItem_void "PyDict_GetItem" (object p, object key) int PyDict_SetItem(object p, object key, object val) except -1 int PyList_Append(object lst, object item) except -1 object PyList_GET_ITEM(object lst, int index) int PyList_CheckExact(object) void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index) char *PyString_AsString(object p) object PyString_FromStringAndSize(char *, int) int PyString_Size(object p) void Py_INCREF(object) cdef extern from "string.h": void *memchr(void *s, int c, size_t n) cdef int string_to_int_safe(char *s, char *end, int *out) except -1: """Convert a base10 string to an integer. This makes sure the whole string is consumed, or it raises ValueError. This is similar to how int(s) works, except you don't need a Python String object. :param s: The string to convert :param end: The character after the integer. So if the string is '12\0', this should be pointing at the '\0'. If the string was '12 ' then this should point at the ' '. :param out: This is the integer that will be returned :return: -1 if an exception is raised. 0 otherwise """ cdef char *integer_end # We can't just return the integer because of how pyrex determines when # there is an exception. out[0] = strtol(s, &integer_end, 10) if integer_end != end: py_s = PyString_FromStringAndSize(s, end-s) raise ValueError('%r is not a valid integer' % (py_s,)) return 0 cdef class KnitIndexReader: cdef object kndx cdef object fp cdef object cache cdef object history cdef char * cur_str cdef char * end_str cdef int history_len def __init__(self, kndx, fp): self.kndx = kndx self.fp = fp self.cache = kndx._cache self.history = kndx._history self.cur_str = NULL self.end_str = NULL self.history_len = 0 cdef int validate(self) except -1: if not PyDict_CheckExact(self.cache): raise TypeError('kndx._cache must be a python dict') if not PyList_CheckExact(self.history): raise TypeError('kndx._history must be a python list') return 0 cdef object process_options(self, char *option_str, char *end): """Process the options string into a list.""" cdef char *next # This is alternative code which creates a python string and splits it. # It is "correct" and more obvious, but slower than the following code. # It can be uncommented to switch in case the other code is seen as # suspect. # options = PyString_FromStringAndSize(option_str, # end - option_str) # return options.split(',') final_options = [] while option_str < end: next = memchr(option_str, c',', end - option_str) if next == NULL: next = end next_option = PyString_FromStringAndSize(option_str, next - option_str) PyList_Append(final_options, next_option) # Move past the ',' option_str = next+1 return final_options cdef object process_parents(self, char *parent_str, char *end): cdef char *next cdef int int_parent cdef char *parent_end # Alternative, correct but slower code. # # parents = PyString_FromStringAndSize(parent_str, # end - parent_str) # real_parents = [] # for parent in parents.split(): # if parent[0].startswith('.'): # real_parents.append(parent[1:]) # else: # real_parents.append(self.history[int(parent)]) # return real_parents parents = [] while parent_str <= end: next = memchr(parent_str, c' ', end - parent_str) if next == NULL or next >= end or next == parent_str: break if parent_str[0] == c'.': # This is an explicit revision id parent_str = parent_str + 1 parent = PyString_FromStringAndSize(parent_str, next - parent_str) else: # This in an integer mapping to original string_to_int_safe(parent_str, next, &int_parent) if int_parent >= self.history_len: raise IndexError('Parent index refers to a revision which' ' does not exist yet.' ' %d > %d' % (int_parent, self.history_len)) parent = PyList_GET_ITEM(self.history, int_parent) # PyList_GET_ITEM steals a reference Py_INCREF(parent) PyList_Append(parents, parent) parent_str = next + 1 return tuple(parents) cdef int process_one_record(self, char *start, char *end) except -1: """Take a simple string and split it into an index record.""" cdef char *version_id_str cdef int version_id_size cdef char *option_str cdef char *option_end cdef char *pos_str cdef int pos cdef char *size_str cdef int size cdef char *parent_str cdef int parent_size cdef void *cache_entry version_id_str = start option_str = memchr(version_id_str, c' ', end - version_id_str) if option_str == NULL or option_str >= end: # Short entry return 0 version_id_size = (option_str - version_id_str) # Move past the space character option_str = option_str + 1 pos_str = memchr(option_str, c' ', end - option_str) if pos_str == NULL or pos_str >= end: # Short entry return 0 option_end = pos_str pos_str = pos_str + 1 size_str = memchr(pos_str, c' ', end - pos_str) if size_str == NULL or size_str >= end: # Short entry return 0 size_str = size_str + 1 parent_str = memchr(size_str, c' ', end - size_str) if parent_str == NULL or parent_str >= end: # Missing parents return 0 parent_str = parent_str + 1 version_id = PyString_FromStringAndSize(version_id_str, version_id_size) options = self.process_options(option_str, option_end) try: string_to_int_safe(pos_str, size_str - 1, &pos) string_to_int_safe(size_str, parent_str - 1, &size) parents = self.process_parents(parent_str, end) except (ValueError, IndexError), e: py_line = PyString_FromStringAndSize(start, end - start) raise errors.KnitCorrupt(self.kndx._filename, "line %r: %s" % (py_line, e)) cache_entry = PyDict_GetItem_void(self.cache, version_id) if cache_entry == NULL: PyList_Append(self.history, version_id) index = self.history_len self.history_len = self.history_len + 1 else: # PyTuple_GetItem_void_void does *not* increment the reference # counter, but casting to does. index = PyTuple_GetItem_void_void(cache_entry, 5) PyDict_SetItem(self.cache, version_id, (version_id, options, pos, size, parents, index, )) return 1 cdef int process_next_record(self) except -1: """Process the next record in the file.""" cdef char *last cdef char *start start = self.cur_str # Find the next newline last = memchr(start, c'\n', self.end_str - start) if last == NULL: # Process until the end of the file last = self.end_str - 1 self.cur_str = self.end_str else: # The last character is right before the '\n' # And the next string is right after it self.cur_str = last + 1 last = last - 1 if last <= start or last[0] != c':': # Incomplete record return 0 return self.process_one_record(start, last) def read(self): cdef int text_size self.validate() self.kndx.check_header(self.fp) # We read the whole thing at once # TODO: jam 2007-05-09 Consider reading incrementally rather than # having to have the whole thing read up front. # we already know that calling f.readlines() versus lots of # f.readline() calls is faster. # The other possibility is to avoid a Python String here # completely. However self.fp may be a 'file-like' object # it is not guaranteed to be a real file. text = self.fp.read() text_size = PyString_Size(text) self.cur_str = PyString_AsString(text) # This points to the last character in the string self.end_str = self.cur_str + text_size while self.cur_str < self.end_str: self.process_next_record() def _load_data_c(kndx, fp): """Load the knit index file into memory.""" reader = KnitIndexReader(kndx, fp) reader.read() bzr-2.7.0/bzrlib/_known_graph_py.py0000644000000000000000000003437611673635356015521 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Implementation of Graph algorithms when we have already loaded everything. """ from __future__ import absolute_import from collections import deque from bzrlib import ( errors, revision, ) class _KnownGraphNode(object): """Represents a single object in the known graph.""" __slots__ = ('key', 'parent_keys', 'child_keys', 'gdfo') def __init__(self, key, parent_keys): self.key = key self.parent_keys = parent_keys self.child_keys = [] # Greatest distance from origin self.gdfo = None def __repr__(self): return '%s(%s gdfo:%s par:%s child:%s)' % ( self.__class__.__name__, self.key, self.gdfo, self.parent_keys, self.child_keys) class _MergeSortNode(object): """Information about a specific node in the merge graph.""" __slots__ = ('key', 'merge_depth', 'revno', 'end_of_merge') def __init__(self, key, merge_depth, revno, end_of_merge): self.key = key self.merge_depth = merge_depth self.revno = revno self.end_of_merge = end_of_merge class KnownGraph(object): """This is a class which assumes we already know the full graph.""" def __init__(self, parent_map, do_cache=True): """Create a new KnownGraph instance. :param parent_map: A dictionary mapping key => parent_keys """ self._nodes = {} # Maps {frozenset(revision_id, revision_id): heads} self._known_heads = {} self.do_cache = do_cache self._initialize_nodes(parent_map) self._find_gdfo() def _initialize_nodes(self, parent_map): """Populate self._nodes. After this has finished: - self._nodes will have an entry for every entry in parent_map. - ghosts will have a parent_keys = None, - all nodes found will also have .child_keys populated with all known child_keys, """ nodes = self._nodes for key, parent_keys in parent_map.iteritems(): if key in nodes: node = nodes[key] node.parent_keys = parent_keys else: node = _KnownGraphNode(key, parent_keys) nodes[key] = node for parent_key in parent_keys: try: parent_node = nodes[parent_key] except KeyError: parent_node = _KnownGraphNode(parent_key, None) nodes[parent_key] = parent_node parent_node.child_keys.append(key) def _find_tails(self): return [node for node in self._nodes.itervalues() if not node.parent_keys] def _find_tips(self): return [node for node in self._nodes.itervalues() if not node.child_keys] def _find_gdfo(self): nodes = self._nodes known_parent_gdfos = {} pending = [] for node in self._find_tails(): node.gdfo = 1 pending.append(node) while pending: node = pending.pop() for child_key in node.child_keys: child = nodes[child_key] if child_key in known_parent_gdfos: known_gdfo = known_parent_gdfos[child_key] + 1 present = True else: known_gdfo = 1 present = False if child.gdfo is None or node.gdfo + 1 > child.gdfo: child.gdfo = node.gdfo + 1 if known_gdfo == len(child.parent_keys): # We are the last parent updating that node, we can # continue from there pending.append(child) if present: del known_parent_gdfos[child_key] else: # Update known_parent_gdfos for a key we couldn't process known_parent_gdfos[child_key] = known_gdfo def add_node(self, key, parent_keys): """Add a new node to the graph. If this fills in a ghost, then the gdfos of all children will be updated accordingly. :param key: The node being added. If this is a duplicate, this is a no-op. :param parent_keys: The parents of the given node. :return: None (should we return if this was a ghost, etc?) """ nodes = self._nodes if key in nodes: node = nodes[key] if node.parent_keys is None: node.parent_keys = parent_keys # A ghost is being added, we can no-longer trust the heads # cache, so clear it self._known_heads.clear() else: # Make sure we compare a list to a list, as tuple != list. parent_keys = list(parent_keys) existing_parent_keys = list(node.parent_keys) if parent_keys == existing_parent_keys: return # Identical content else: raise ValueError('Parent key mismatch, existing node %s' ' has parents of %s not %s' % (key, existing_parent_keys, parent_keys)) else: node = _KnownGraphNode(key, parent_keys) nodes[key] = node parent_gdfo = 0 for parent_key in parent_keys: try: parent_node = nodes[parent_key] except KeyError: parent_node = _KnownGraphNode(parent_key, None) # Ghosts and roots have gdfo 1 parent_node.gdfo = 1 nodes[parent_key] = parent_node if parent_gdfo < parent_node.gdfo: parent_gdfo = parent_node.gdfo parent_node.child_keys.append(key) node.gdfo = parent_gdfo + 1 # Now fill the gdfo to all children # Note that this loop is slightly inefficient, in that we may visit the # same child (and its decendents) more than once, however, it is # 'efficient' in that we only walk to nodes that would be updated, # rather than all nodes # We use a deque rather than a simple list stack, to go for BFD rather # than DFD. So that if a longer path is possible, we walk it before we # get to the final child pending = deque([node]) while pending: node = pending.popleft() next_gdfo = node.gdfo + 1 for child_key in node.child_keys: child = nodes[child_key] if child.gdfo < next_gdfo: # This child is being updated, we need to check its # children child.gdfo = next_gdfo pending.append(child) def heads(self, keys): """Return the heads from amongst keys. This is done by searching the ancestries of each key. Any key that is reachable from another key is not returned; all the others are. This operation scales with the relative depth between any two keys. It uses gdfo to avoid walking all ancestry. :param keys: An iterable of keys. :return: A set of the heads. Note that as a set there is no ordering information. Callers will need to filter their input to create order if they need it. """ candidate_nodes = dict((key, self._nodes[key]) for key in keys) if revision.NULL_REVISION in candidate_nodes: # NULL_REVISION is only a head if it is the only entry candidate_nodes.pop(revision.NULL_REVISION) if not candidate_nodes: return frozenset([revision.NULL_REVISION]) if len(candidate_nodes) < 2: # No or only one candidate return frozenset(candidate_nodes) heads_key = frozenset(candidate_nodes) # Do we have a cached result ? try: heads = self._known_heads[heads_key] return heads except KeyError: pass # Let's compute the heads seen = set() pending = [] min_gdfo = None for node in candidate_nodes.values(): if node.parent_keys: pending.extend(node.parent_keys) if min_gdfo is None or node.gdfo < min_gdfo: min_gdfo = node.gdfo nodes = self._nodes while pending: node_key = pending.pop() if node_key in seen: # node already appears in some ancestry continue seen.add(node_key) node = nodes[node_key] if node.gdfo <= min_gdfo: continue if node.parent_keys: pending.extend(node.parent_keys) heads = heads_key.difference(seen) if self.do_cache: self._known_heads[heads_key] = heads return heads def topo_sort(self): """Return the nodes in topological order. All parents must occur before all children. """ for node in self._nodes.itervalues(): if node.gdfo is None: raise errors.GraphCycleError(self._nodes) pending = self._find_tails() pending_pop = pending.pop pending_append = pending.append topo_order = [] topo_order_append = topo_order.append num_seen_parents = dict.fromkeys(self._nodes, 0) while pending: node = pending_pop() if node.parent_keys is not None: # We don't include ghost parents topo_order_append(node.key) for child_key in node.child_keys: child_node = self._nodes[child_key] seen_parents = num_seen_parents[child_key] + 1 if seen_parents == len(child_node.parent_keys): # All parents have been processed, enqueue this child pending_append(child_node) # This has been queued up, stop tracking it del num_seen_parents[child_key] else: num_seen_parents[child_key] = seen_parents # We started from the parents, so we don't need to do anymore work return topo_order def gc_sort(self): """Return a reverse topological ordering which is 'stable'. There are a few constraints: 1) Reverse topological (all children before all parents) 2) Grouped by prefix 3) 'stable' sorting, so that we get the same result, independent of machine, or extra data. To do this, we use the same basic algorithm as topo_sort, but when we aren't sure what node to access next, we sort them lexicographically. """ tips = self._find_tips() # Split the tips based on prefix prefix_tips = {} for node in tips: if node.key.__class__ is str or len(node.key) == 1: prefix = '' else: prefix = node.key[0] prefix_tips.setdefault(prefix, []).append(node) num_seen_children = dict.fromkeys(self._nodes, 0) result = [] for prefix in sorted(prefix_tips): pending = sorted(prefix_tips[prefix], key=lambda n:n.key, reverse=True) while pending: node = pending.pop() if node.parent_keys is None: # Ghost node, skip it continue result.append(node.key) for parent_key in sorted(node.parent_keys, reverse=True): parent_node = self._nodes[parent_key] seen_children = num_seen_children[parent_key] + 1 if seen_children == len(parent_node.child_keys): # All children have been processed, enqueue this parent pending.append(parent_node) # This has been queued up, stop tracking it del num_seen_children[parent_key] else: num_seen_children[parent_key] = seen_children return result def merge_sort(self, tip_key): """Compute the merge sorted graph output.""" from bzrlib import tsort as_parent_map = dict((node.key, node.parent_keys) for node in self._nodes.itervalues() if node.parent_keys is not None) # We intentionally always generate revnos and never force the # mainline_revisions # Strip the sequence_number that merge_sort generates return [_MergeSortNode(key, merge_depth, revno, end_of_merge) for _, key, merge_depth, revno, end_of_merge in tsort.merge_sort(as_parent_map, tip_key, mainline_revisions=None, generate_revno=True)] def get_parent_keys(self, key): """Get the parents for a key Returns a list containg the parents keys. If the key is a ghost, None is returned. A KeyError will be raised if the key is not in the graph. :param keys: Key to check (eg revision_id) :return: A list of parents """ return self._nodes[key].parent_keys def get_child_keys(self, key): """Get the children for a key Returns a list containg the children keys. A KeyError will be raised if the key is not in the graph. :param keys: Key to check (eg revision_id) :return: A list of children """ return self._nodes[key].child_keys bzr-2.7.0/bzrlib/_known_graph_pyx.pyx0000644000000000000000000011246411337021464016056 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Implementation of Graph algorithms when we have already loaded everything. """ cdef extern from "python-compat.h": pass cdef extern from "Python.h": ctypedef int Py_ssize_t ctypedef struct PyObject: pass int PyString_CheckExact(object) int PyObject_RichCompareBool(object, object, int) int Py_LT int PyTuple_CheckExact(object) object PyTuple_New(Py_ssize_t n) Py_ssize_t PyTuple_GET_SIZE(object t) PyObject * PyTuple_GET_ITEM(object t, Py_ssize_t o) void PyTuple_SET_ITEM(object t, Py_ssize_t o, object v) int PyList_CheckExact(object) Py_ssize_t PyList_GET_SIZE(object l) PyObject * PyList_GET_ITEM(object l, Py_ssize_t o) int PyList_SetItem(object l, Py_ssize_t o, object l) except -1 int PyList_Append(object l, object v) except -1 int PyDict_CheckExact(object d) Py_ssize_t PyDict_Size(object d) except -1 PyObject * PyDict_GetItem(object d, object k) int PyDict_SetItem(object d, object k, object v) except -1 int PyDict_DelItem(object d, object k) except -1 int PyDict_Next(object d, Py_ssize_t *pos, PyObject **k, PyObject **v) void Py_INCREF(object) from collections import deque import gc from bzrlib import errors, revision cdef object NULL_REVISION NULL_REVISION = revision.NULL_REVISION cdef class _KnownGraphNode: """Represents a single object in the known graph.""" cdef object key cdef object parents cdef object children cdef public long gdfo cdef int seen cdef object extra def __init__(self, key): self.key = key self.parents = None self.children = [] # Greatest distance from origin self.gdfo = -1 self.seen = 0 self.extra = None property child_keys: def __get__(self): cdef _KnownGraphNode child keys = [] for child in self.children: PyList_Append(keys, child.key) return keys property parent_keys: def __get__(self): if self.parents is None: return None cdef _KnownGraphNode parent keys = [] for parent in self.parents: PyList_Append(keys, parent.key) return keys cdef clear_references(self): self.parents = None self.children = None def __repr__(self): cdef _KnownGraphNode node parent_keys = [] if self.parents is not None: for node in self.parents: parent_keys.append(node.key) child_keys = [] if self.children is not None: for node in self.children: child_keys.append(node.key) return '%s(%s gdfo:%s par:%s child:%s)' % ( self.__class__.__name__, self.key, self.gdfo, parent_keys, child_keys) cdef _KnownGraphNode _get_list_node(lst, Py_ssize_t pos): cdef PyObject *temp_node temp_node = PyList_GET_ITEM(lst, pos) return <_KnownGraphNode>temp_node cdef _KnownGraphNode _get_tuple_node(tpl, Py_ssize_t pos): cdef PyObject *temp_node temp_node = PyTuple_GET_ITEM(tpl, pos) return <_KnownGraphNode>temp_node def get_key(node): cdef _KnownGraphNode real_node real_node = node return real_node.key cdef object _sort_list_nodes(object lst_or_tpl, int reverse): """Sort a list of _KnownGraphNode objects. If lst_or_tpl is a list, it is allowed to mutate in place. It may also just return the input list if everything is already sorted. """ cdef _KnownGraphNode node1, node2 cdef int do_swap, is_tuple cdef Py_ssize_t length is_tuple = PyTuple_CheckExact(lst_or_tpl) if not (is_tuple or PyList_CheckExact(lst_or_tpl)): raise TypeError('lst_or_tpl must be a list or tuple.') length = len(lst_or_tpl) if length == 0 or length == 1: return lst_or_tpl if length == 2: if is_tuple: node1 = _get_tuple_node(lst_or_tpl, 0) node2 = _get_tuple_node(lst_or_tpl, 1) else: node1 = _get_list_node(lst_or_tpl, 0) node2 = _get_list_node(lst_or_tpl, 1) if reverse: do_swap = PyObject_RichCompareBool(node1.key, node2.key, Py_LT) else: do_swap = PyObject_RichCompareBool(node2.key, node1.key, Py_LT) if not do_swap: return lst_or_tpl if is_tuple: return (node2, node1) else: # Swap 'in-place', since lists are mutable Py_INCREF(node1) PyList_SetItem(lst_or_tpl, 1, node1) Py_INCREF(node2) PyList_SetItem(lst_or_tpl, 0, node2) return lst_or_tpl # For all other sizes, we just use 'sorted()' if is_tuple: # Note that sorted() is just list(iterable).sort() lst_or_tpl = list(lst_or_tpl) lst_or_tpl.sort(key=get_key, reverse=reverse) return lst_or_tpl cdef class _MergeSorter cdef class KnownGraph: """This is a class which assumes we already know the full graph.""" cdef public object _nodes cdef public object _known_heads cdef public int do_cache def __init__(self, parent_map, do_cache=True): """Create a new KnownGraph instance. :param parent_map: A dictionary mapping key => parent_keys """ # tests at pre-allocating the node dict actually slowed things down self._nodes = {} # Maps {sorted(revision_id, revision_id): heads} self._known_heads = {} self.do_cache = int(do_cache) # TODO: consider disabling gc since we are allocating a lot of nodes # that won't be collectable anyway. real world testing has not # shown a specific impact, yet. self._initialize_nodes(parent_map) self._find_gdfo() def __dealloc__(self): cdef _KnownGraphNode child cdef Py_ssize_t pos cdef PyObject *temp_node while PyDict_Next(self._nodes, &pos, NULL, &temp_node): child = <_KnownGraphNode>temp_node child.clear_references() cdef _KnownGraphNode _get_or_create_node(self, key): cdef PyObject *temp_node cdef _KnownGraphNode node temp_node = PyDict_GetItem(self._nodes, key) if temp_node == NULL: node = _KnownGraphNode(key) PyDict_SetItem(self._nodes, key, node) else: node = <_KnownGraphNode>temp_node return node cdef _populate_parents(self, _KnownGraphNode node, parent_keys): cdef Py_ssize_t num_parent_keys, pos cdef _KnownGraphNode parent_node num_parent_keys = len(parent_keys) # We know how many parents, so we pre allocate the tuple parent_nodes = PyTuple_New(num_parent_keys) for pos from 0 <= pos < num_parent_keys: # Note: it costs us 10ms out of 40ms to lookup all of these # parents, it doesn't seem to be an allocation overhead, # but rather a lookup overhead. There doesn't seem to be # a way around it, and that is one reason why # KnownGraphNode maintains a direct pointer to the parent # node. # We use [] because parent_keys may be a tuple or list parent_node = self._get_or_create_node(parent_keys[pos]) # PyTuple_SET_ITEM will steal a reference, so INCREF first Py_INCREF(parent_node) PyTuple_SET_ITEM(parent_nodes, pos, parent_node) PyList_Append(parent_node.children, node) node.parents = parent_nodes def _initialize_nodes(self, parent_map): """Populate self._nodes. After this has finished: - self._nodes will have an entry for every entry in parent_map. - ghosts will have a parent_keys = None, - all nodes found will also have child_keys populated with all known child keys, """ cdef PyObject *temp_key, *temp_parent_keys, *temp_node cdef Py_ssize_t pos cdef _KnownGraphNode node cdef _KnownGraphNode parent_node if not PyDict_CheckExact(parent_map): raise TypeError('parent_map should be a dict of {key:parent_keys}') # for key, parent_keys in parent_map.iteritems(): pos = 0 while PyDict_Next(parent_map, &pos, &temp_key, &temp_parent_keys): key = temp_key parent_keys = temp_parent_keys node = self._get_or_create_node(key) self._populate_parents(node, parent_keys) def _find_tails(self): cdef PyObject *temp_node cdef _KnownGraphNode node cdef Py_ssize_t pos tails = [] pos = 0 while PyDict_Next(self._nodes, &pos, NULL, &temp_node): node = <_KnownGraphNode>temp_node if node.parents is None or PyTuple_GET_SIZE(node.parents) == 0: node.gdfo = 1 PyList_Append(tails, node) return tails def _find_tips(self): cdef PyObject *temp_node cdef _KnownGraphNode node cdef Py_ssize_t pos tips = [] pos = 0 while PyDict_Next(self._nodes, &pos, NULL, &temp_node): node = <_KnownGraphNode>temp_node if PyList_GET_SIZE(node.children) == 0: PyList_Append(tips, node) return tips def _find_gdfo(self): cdef _KnownGraphNode node cdef _KnownGraphNode child cdef PyObject *temp cdef Py_ssize_t pos cdef int replace cdef Py_ssize_t last_item cdef long next_gdfo pending = self._find_tails() last_item = PyList_GET_SIZE(pending) - 1 while last_item >= 0: # Avoid pop followed by push, instead, peek, and replace # timing shows this is 930ms => 770ms for OOo node = _get_list_node(pending, last_item) last_item = last_item - 1 next_gdfo = node.gdfo + 1 for pos from 0 <= pos < PyList_GET_SIZE(node.children): child = _get_list_node(node.children, pos) if next_gdfo > child.gdfo: child.gdfo = next_gdfo child.seen = child.seen + 1 if child.seen == PyTuple_GET_SIZE(child.parents): # This child is populated, queue it to be walked last_item = last_item + 1 if last_item < PyList_GET_SIZE(pending): Py_INCREF(child) # SetItem steals a ref PyList_SetItem(pending, last_item, child) else: PyList_Append(pending, child) # We have queued this node, we don't need to track it # anymore child.seen = 0 def add_node(self, key, parent_keys): """Add a new node to the graph. If this fills in a ghost, then the gdfos of all children will be updated accordingly. :param key: The node being added. If this is a duplicate, this is a no-op. :param parent_keys: The parents of the given node. :return: None (should we return if this was a ghost, etc?) """ cdef PyObject *maybe_node cdef _KnownGraphNode node, parent_node, child_node cdef long parent_gdfo, next_gdfo maybe_node = PyDict_GetItem(self._nodes, key) if maybe_node != NULL: node = <_KnownGraphNode>maybe_node if node.parents is None: # We are filling in a ghost self._populate_parents(node, parent_keys) # We can't trust cached heads anymore self._known_heads.clear() else: # Ensure that the parent_key list matches existing_parent_keys = [] for parent_node in node.parents: existing_parent_keys.append(parent_node.key) # Make sure we use a list for the comparison, in case it was a # tuple, etc parent_keys = list(parent_keys) if existing_parent_keys == parent_keys: # Exact match, nothing more to do return else: raise ValueError('Parent key mismatch, existing node %s' ' has parents of %s not %s' % (key, existing_parent_keys, parent_keys)) else: node = _KnownGraphNode(key) PyDict_SetItem(self._nodes, key, node) self._populate_parents(node, parent_keys) parent_gdfo = 0 for parent_node in node.parents: if parent_node.gdfo == -1: # This is a newly introduced ghost, so it gets gdfo of 1 parent_node.gdfo = 1 if parent_gdfo < parent_node.gdfo: parent_gdfo = parent_node.gdfo node.gdfo = parent_gdfo + 1 # Now fill the gdfo to all children # Note that this loop is slightly inefficient, in that we may visit the # same child (and its decendents) more than once, however, it is # 'efficient' in that we only walk to nodes that would be updated, # rather than all nodes # We use a deque rather than a simple list stack, to go for BFD rather # than DFD. So that if a longer path is possible, we walk it before we # get to the final child pending = deque([node]) pending_popleft = pending.popleft pending_append = pending.append while pending: node = pending_popleft() next_gdfo = node.gdfo + 1 for child_node in node.children: if child_node.gdfo < next_gdfo: # This child is being updated, we need to check its # children child_node.gdfo = next_gdfo pending_append(child_node) def heads(self, keys): """Return the heads from amongst keys. This is done by searching the ancestries of each key. Any key that is reachable from another key is not returned; all the others are. This operation scales with the relative depth between any two keys. It uses gdfo to avoid walking all ancestry. :param keys: An iterable of keys. :return: A set of the heads. Note that as a set there is no ordering information. Callers will need to filter their input to create order if they need it. """ cdef PyObject *maybe_node cdef PyObject *maybe_heads cdef PyObject *temp_node cdef _KnownGraphNode node cdef Py_ssize_t pos, last_item cdef long min_gdfo heads_key = frozenset(keys) maybe_heads = PyDict_GetItem(self._known_heads, heads_key) if maybe_heads != NULL: return maybe_heads # Not cached, compute it ourselves candidate_nodes = {} for key in keys: maybe_node = PyDict_GetItem(self._nodes, key) if maybe_node == NULL: raise KeyError('key %s not in nodes' % (key,)) PyDict_SetItem(candidate_nodes, key, maybe_node) maybe_node = PyDict_GetItem(candidate_nodes, NULL_REVISION) if maybe_node != NULL: # NULL_REVISION is only a head if it is the only entry candidate_nodes.pop(NULL_REVISION) if not candidate_nodes: return frozenset([NULL_REVISION]) # The keys changed, so recalculate heads_key heads_key = frozenset(candidate_nodes) if PyDict_Size(candidate_nodes) < 2: return heads_key cleanup = [] pending = [] # we know a gdfo cannot be longer than a linear chain of all nodes min_gdfo = PyDict_Size(self._nodes) + 1 # Build up nodes that need to be walked, note that starting nodes are # not added to seen() pos = 0 while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node): node = <_KnownGraphNode>temp_node if node.parents is not None: pending.extend(node.parents) if node.gdfo < min_gdfo: min_gdfo = node.gdfo # Now do all the real work last_item = PyList_GET_SIZE(pending) - 1 while last_item >= 0: node = _get_list_node(pending, last_item) last_item = last_item - 1 if node.seen: # node already appears in some ancestry continue PyList_Append(cleanup, node) node.seen = 1 if node.gdfo <= min_gdfo: continue if node.parents is not None and PyTuple_GET_SIZE(node.parents) > 0: for pos from 0 <= pos < PyTuple_GET_SIZE(node.parents): parent_node = _get_tuple_node(node.parents, pos) last_item = last_item + 1 if last_item < PyList_GET_SIZE(pending): Py_INCREF(parent_node) # SetItem steals a ref PyList_SetItem(pending, last_item, parent_node) else: PyList_Append(pending, parent_node) heads = [] pos = 0 while PyDict_Next(candidate_nodes, &pos, NULL, &temp_node): node = <_KnownGraphNode>temp_node if not node.seen: PyList_Append(heads, node.key) heads = frozenset(heads) for pos from 0 <= pos < PyList_GET_SIZE(cleanup): node = _get_list_node(cleanup, pos) node.seen = 0 if self.do_cache: PyDict_SetItem(self._known_heads, heads_key, heads) return heads def topo_sort(self): """Return the nodes in topological order. All parents must occur before all children. """ # This is, for the most part, the same iteration order that we used for # _find_gdfo, consider finding a way to remove the duplication # In general, we find the 'tails' (nodes with no parents), and then # walk to the children. For children that have all of their parents # yielded, we queue up the child to be yielded as well. cdef _KnownGraphNode node cdef _KnownGraphNode child cdef PyObject *temp cdef Py_ssize_t pos cdef int replace cdef Py_ssize_t last_item pending = self._find_tails() if PyList_GET_SIZE(pending) == 0 and len(self._nodes) > 0: raise errors.GraphCycleError(self._nodes) topo_order = [] last_item = PyList_GET_SIZE(pending) - 1 while last_item >= 0: # Avoid pop followed by push, instead, peek, and replace # timing shows this is 930ms => 770ms for OOo node = _get_list_node(pending, last_item) last_item = last_item - 1 if node.parents is not None: # We don't include ghost parents PyList_Append(topo_order, node.key) for pos from 0 <= pos < PyList_GET_SIZE(node.children): child = _get_list_node(node.children, pos) if child.gdfo == -1: # We know we have a graph cycle because a node has a parent # which we couldn't find raise errors.GraphCycleError(self._nodes) child.seen = child.seen + 1 if child.seen == PyTuple_GET_SIZE(child.parents): # All parents of this child have been yielded, queue this # one to be yielded as well last_item = last_item + 1 if last_item < PyList_GET_SIZE(pending): Py_INCREF(child) # SetItem steals a ref PyList_SetItem(pending, last_item, child) else: PyList_Append(pending, child) # We have queued this node, we don't need to track it # anymore child.seen = 0 # We started from the parents, so we don't need to do anymore work return topo_order def gc_sort(self): """Return a reverse topological ordering which is 'stable'. There are a few constraints: 1) Reverse topological (all children before all parents) 2) Grouped by prefix 3) 'stable' sorting, so that we get the same result, independent of machine, or extra data. To do this, we use the same basic algorithm as topo_sort, but when we aren't sure what node to access next, we sort them lexicographically. """ cdef PyObject *temp cdef Py_ssize_t pos, last_item cdef _KnownGraphNode node, node2, parent_node tips = self._find_tips() # Split the tips based on prefix prefix_tips = {} for pos from 0 <= pos < PyList_GET_SIZE(tips): node = _get_list_node(tips, pos) if PyString_CheckExact(node.key) or len(node.key) == 1: prefix = '' else: prefix = node.key[0] temp = PyDict_GetItem(prefix_tips, prefix) if temp == NULL: prefix_tips[prefix] = [node] else: tip_nodes = temp PyList_Append(tip_nodes, node) result = [] for prefix in sorted(prefix_tips): temp = PyDict_GetItem(prefix_tips, prefix) assert temp != NULL tip_nodes = temp pending = _sort_list_nodes(tip_nodes, 1) last_item = PyList_GET_SIZE(pending) - 1 while last_item >= 0: node = _get_list_node(pending, last_item) last_item = last_item - 1 if node.parents is None: # Ghost continue PyList_Append(result, node.key) # Sorting the parent keys isn't strictly necessary for stable # sorting of a given graph. But it does help minimize the # differences between graphs # For bzr.dev ancestry: # 4.73ms no sort # 7.73ms RichCompareBool sort parents = _sort_list_nodes(node.parents, 1) for pos from 0 <= pos < len(parents): if PyTuple_CheckExact(parents): parent_node = _get_tuple_node(parents, pos) else: parent_node = _get_list_node(parents, pos) # TODO: GraphCycle detection parent_node.seen = parent_node.seen + 1 if (parent_node.seen == PyList_GET_SIZE(parent_node.children)): # All children have been processed, queue up this # parent last_item = last_item + 1 if last_item < PyList_GET_SIZE(pending): Py_INCREF(parent_node) # SetItem steals a ref PyList_SetItem(pending, last_item, parent_node) else: PyList_Append(pending, parent_node) parent_node.seen = 0 return result def merge_sort(self, tip_key): """Compute the merge sorted graph output.""" cdef _MergeSorter sorter # TODO: consider disabling gc since we are allocating a lot of nodes # that won't be collectable anyway. real world testing has not # shown a specific impact, yet. sorter = _MergeSorter(self, tip_key) return sorter.topo_order() def get_parent_keys(self, key): """Get the parents for a key Returns a list containg the parents keys. If the key is a ghost, None is returned. A KeyError will be raised if the key is not in the graph. :param keys: Key to check (eg revision_id) :return: A list of parents """ return self._nodes[key].parent_keys def get_child_keys(self, key): """Get the children for a key Returns a list containg the children keys. A KeyError will be raised if the key is not in the graph. :param keys: Key to check (eg revision_id) :return: A list of children """ return self._nodes[key].child_keys cdef class _MergeSortNode: """Tracks information about a node during the merge_sort operation.""" # Public api cdef public object key cdef public long merge_depth cdef public object end_of_merge # True/False Is this the end of the current merge # Private api, used while computing the information cdef _KnownGraphNode left_parent cdef _KnownGraphNode left_pending_parent cdef object pending_parents # list of _KnownGraphNode for non-left parents cdef long _revno_first cdef long _revno_second cdef long _revno_last # TODO: turn these into flag/bit fields rather than individual members cdef int is_first_child # Is this the first child? cdef int seen_by_child # A child node has seen this parent cdef int completed # Fully Processed def __init__(self, key): self.key = key self.merge_depth = -1 self.left_parent = None self.left_pending_parent = None self.pending_parents = None self._revno_first = -1 self._revno_second = -1 self._revno_last = -1 self.is_first_child = 0 self.seen_by_child = 0 self.completed = 0 def __repr__(self): return '%s(%s depth:%s rev:%s,%s,%s first:%s seen:%s)' % ( self.__class__.__name__, self.key, self.merge_depth, self._revno_first, self._revno_second, self._revno_last, self.is_first_child, self.seen_by_child) cdef int has_pending_parents(self): # cannot_raise if self.left_pending_parent is not None or self.pending_parents: return 1 return 0 cdef object _revno(self): if self._revno_first == -1: if self._revno_second != -1: raise RuntimeError('Something wrong with: %s' % (self,)) return (self._revno_last,) else: return (self._revno_first, self._revno_second, self._revno_last) property revno: def __get__(self): return self._revno() cdef class _MergeSorter: """This class does the work of computing the merge_sort ordering. We have some small advantages, in that we get all the extra information that KnownGraph knows, like knowing the child lists, etc. """ # Current performance numbers for merge_sort(bzr_dev_parent_map): # 302ms tsort.merge_sort() # 91ms graph.KnownGraph().merge_sort() # 40ms kg.merge_sort() cdef KnownGraph graph cdef object _depth_first_stack # list cdef Py_ssize_t _last_stack_item # offset to last item on stack # cdef object _ms_nodes # dict of key => _MergeSortNode cdef object _revno_to_branch_count # {revno => num child branches} cdef object _scheduled_nodes # List of nodes ready to be yielded def __init__(self, known_graph, tip_key): cdef _KnownGraphNode node self.graph = known_graph # self._ms_nodes = {} self._revno_to_branch_count = {} self._depth_first_stack = [] self._last_stack_item = -1 self._scheduled_nodes = [] if (tip_key is not None and tip_key != NULL_REVISION and tip_key != (NULL_REVISION,)): node = self.graph._nodes[tip_key] self._push_node(node, 0) cdef _MergeSortNode _get_ms_node(self, _KnownGraphNode node): cdef PyObject *temp_node cdef _MergeSortNode ms_node if node.extra is None: ms_node = _MergeSortNode(node.key) node.extra = ms_node else: ms_node = <_MergeSortNode>node.extra return ms_node cdef _push_node(self, _KnownGraphNode node, long merge_depth): cdef _KnownGraphNode parent_node cdef _MergeSortNode ms_node, ms_parent_node cdef Py_ssize_t pos ms_node = self._get_ms_node(node) ms_node.merge_depth = merge_depth if node.parents is None: raise RuntimeError('ghost nodes should not be pushed' ' onto the stack: %s' % (node,)) if PyTuple_GET_SIZE(node.parents) > 0: parent_node = _get_tuple_node(node.parents, 0) ms_node.left_parent = parent_node if parent_node.parents is None: # left-hand ghost ms_node.left_pending_parent = None ms_node.left_parent = None else: ms_node.left_pending_parent = parent_node if PyTuple_GET_SIZE(node.parents) > 1: ms_node.pending_parents = [] for pos from 1 <= pos < PyTuple_GET_SIZE(node.parents): parent_node = _get_tuple_node(node.parents, pos) if parent_node.parents is None: # ghost continue PyList_Append(ms_node.pending_parents, parent_node) ms_node.is_first_child = 1 if ms_node.left_parent is not None: ms_parent_node = self._get_ms_node(ms_node.left_parent) if ms_parent_node.seen_by_child: ms_node.is_first_child = 0 ms_parent_node.seen_by_child = 1 self._last_stack_item = self._last_stack_item + 1 if self._last_stack_item < PyList_GET_SIZE(self._depth_first_stack): Py_INCREF(node) # SetItem steals a ref PyList_SetItem(self._depth_first_stack, self._last_stack_item, node) else: PyList_Append(self._depth_first_stack, node) cdef _pop_node(self): cdef PyObject *temp cdef _MergeSortNode ms_node, ms_parent_node, ms_prev_node cdef _KnownGraphNode node, parent_node, prev_node node = _get_list_node(self._depth_first_stack, self._last_stack_item) ms_node = <_MergeSortNode>node.extra self._last_stack_item = self._last_stack_item - 1 if ms_node.left_parent is not None: # Assign the revision number from the left-hand parent ms_parent_node = <_MergeSortNode>ms_node.left_parent.extra if ms_node.is_first_child: # First child just increments the final digit ms_node._revno_first = ms_parent_node._revno_first ms_node._revno_second = ms_parent_node._revno_second ms_node._revno_last = ms_parent_node._revno_last + 1 else: # Not the first child, make a new branch # (mainline_revno, branch_count, 1) if ms_parent_node._revno_first == -1: # Mainline ancestor, the increment is on the last digit base_revno = ms_parent_node._revno_last else: base_revno = ms_parent_node._revno_first temp = PyDict_GetItem(self._revno_to_branch_count, base_revno) if temp == NULL: branch_count = 1 else: branch_count = (temp) + 1 PyDict_SetItem(self._revno_to_branch_count, base_revno, branch_count) ms_node._revno_first = base_revno ms_node._revno_second = branch_count ms_node._revno_last = 1 else: temp = PyDict_GetItem(self._revno_to_branch_count, 0) if temp == NULL: # The first root node doesn't have a 3-digit revno root_count = 0 ms_node._revno_first = -1 ms_node._revno_second = -1 ms_node._revno_last = 1 else: root_count = (temp) + 1 ms_node._revno_first = 0 ms_node._revno_second = root_count ms_node._revno_last = 1 PyDict_SetItem(self._revno_to_branch_count, 0, root_count) ms_node.completed = 1 if PyList_GET_SIZE(self._scheduled_nodes) == 0: # The first scheduled node is always the end of merge ms_node.end_of_merge = True else: prev_node = _get_list_node(self._scheduled_nodes, PyList_GET_SIZE(self._scheduled_nodes) - 1) ms_prev_node = <_MergeSortNode>prev_node.extra if ms_prev_node.merge_depth < ms_node.merge_depth: # The previously pushed node is to our left, so this is the end # of this right-hand chain ms_node.end_of_merge = True elif (ms_prev_node.merge_depth == ms_node.merge_depth and prev_node not in node.parents): # The next node is not a direct parent of this node ms_node.end_of_merge = True else: ms_node.end_of_merge = False PyList_Append(self._scheduled_nodes, node) cdef _schedule_stack(self): cdef _KnownGraphNode last_node, next_node cdef _MergeSortNode ms_node, ms_last_node, ms_next_node cdef long next_merge_depth ordered = [] while self._last_stack_item >= 0: # Peek at the last item on the stack last_node = _get_list_node(self._depth_first_stack, self._last_stack_item) if last_node.gdfo == -1: # if _find_gdfo skipped a node, that means there is a graph # cycle, error out now raise errors.GraphCycleError(self.graph._nodes) ms_last_node = <_MergeSortNode>last_node.extra if not ms_last_node.has_pending_parents(): # Processed all parents, pop this node self._pop_node() continue while ms_last_node.has_pending_parents(): if ms_last_node.left_pending_parent is not None: # recurse depth first into the primary parent next_node = ms_last_node.left_pending_parent ms_last_node.left_pending_parent = None else: # place any merges in right-to-left order for scheduling # which gives us left-to-right order after we reverse # the scheduled queue. # Note: This has the effect of allocating common-new # revisions to the right-most subtree rather than the # left most, which will display nicely (you get # smaller trees at the top of the combined merge). next_node = ms_last_node.pending_parents.pop() ms_next_node = self._get_ms_node(next_node) if ms_next_node.completed: # this parent was completed by a child on the # call stack. skip it. continue # otherwise transfer it from the source graph into the # top of the current depth first search stack. if next_node is ms_last_node.left_parent: next_merge_depth = ms_last_node.merge_depth else: next_merge_depth = ms_last_node.merge_depth + 1 self._push_node(next_node, next_merge_depth) # and do not continue processing parents until this 'call' # has recursed. break cdef topo_order(self): cdef _MergeSortNode ms_node cdef _KnownGraphNode node cdef Py_ssize_t pos cdef PyObject *temp_key, *temp_node # Note: allocating a _MergeSortNode and deallocating it for all nodes # costs approx 8.52ms (21%) of the total runtime # We might consider moving the attributes into the base # KnownGraph object. self._schedule_stack() # We've set up the basic schedule, now we can continue processing the # output. # Note: This final loop costs us 40.0ms => 28.8ms (11ms, 25%) on # bzr.dev, to convert the internal Object representation into a # Tuple representation... # 2ms is walking the data and computing revno tuples # 7ms is computing the return tuple # 4ms is PyList_Append() ordered = [] # output the result in reverse order, and separate the generated info for pos from PyList_GET_SIZE(self._scheduled_nodes) > pos >= 0: node = _get_list_node(self._scheduled_nodes, pos) ms_node = <_MergeSortNode>node.extra PyList_Append(ordered, ms_node) node.extra = None # Clear out the scheduled nodes now that we're done self._scheduled_nodes = [] return ordered bzr-2.7.0/bzrlib/_patiencediff_c.c0000644000000000000000000011412711475665347015205 0ustar 00000000000000/* Copyright (C) 2007, 2010 Canonical Ltd This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Function equate_lines based on bdiff.c from Mercurial. Copyright (C) 2005, 2006 Matt Mackall Functions unique_lcs/recurse_matches based on _patiencediff_py.py. Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd */ #include #include #include #include "python-compat.h" #if defined(__GNUC__) # define inline __inline__ #elif defined(_MSC_VER) # define inline __inline #else # define inline #endif #define MIN(a, b) (((a) > (b)) ? (b) : (a)) #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #define SENTINEL -1 /* malloc returns NULL on some platforms if you try to allocate nothing, * causing and * . On glibc it passes, but * let's make it fail to aid testing. */ #define guarded_malloc(x) ( (x) ? malloc(x) : NULL ) enum { OP_EQUAL = 0, OP_INSERT, OP_DELETE, OP_REPLACE }; /* values from this array need to correspont to the order of the enum above */ static char *opcode_names[] = { "equal", "insert", "delete", "replace", }; struct line { long hash; /* hash code of the string/object */ Py_ssize_t next; /* next line from the same equivalence class */ Py_ssize_t equiv; /* equivalence class */ PyObject *data; }; struct bucket { Py_ssize_t a_head; /* first item in `a` from this equivalence class */ Py_ssize_t a_count; Py_ssize_t b_head; /* first item in `b` from this equivalence class */ Py_ssize_t b_count; Py_ssize_t a_pos; Py_ssize_t b_pos; }; struct hashtable { Py_ssize_t last_a_pos; Py_ssize_t last_b_pos; Py_ssize_t size; struct bucket *table; }; struct matching_line { Py_ssize_t a; /* index of the line in `a` */ Py_ssize_t b; /* index of the line in `b` */ }; struct matching_block { Py_ssize_t a; /* index of the first line in `a` */ Py_ssize_t b; /* index of the first line in `b` */ Py_ssize_t len; /* length of the block */ }; struct matching_blocks { struct matching_block *matches; Py_ssize_t count; }; struct opcode { int tag; Py_ssize_t i1; Py_ssize_t i2; Py_ssize_t j1; Py_ssize_t j2; }; typedef struct { PyObject_HEAD Py_ssize_t asize; Py_ssize_t bsize; struct line *a; struct line *b; struct hashtable hashtable; Py_ssize_t *backpointers; } PatienceSequenceMatcher; static inline Py_ssize_t bisect_left(Py_ssize_t *list, Py_ssize_t item, Py_ssize_t lo, Py_ssize_t hi) { while (lo < hi) { Py_ssize_t mid = lo / 2 + hi / 2 + (lo % 2 + hi % 2) / 2; if (list[mid] < item) lo = mid + 1; else hi = mid; } return lo; } static inline int compare_lines(struct line *a, struct line *b) { return ((a->hash != b->hash) || PyObject_Compare(a->data, b->data)); } static inline int find_equivalence_class(struct bucket *hashtable, Py_ssize_t hsize, struct line *lines, struct line *ref_lines, Py_ssize_t i) { Py_ssize_t j; for (j = lines[i].hash & hsize; hashtable[j].b_head != SENTINEL; j = (j + 1) & hsize) { if (!compare_lines(lines + i, ref_lines + hashtable[j].b_head)) { break; } } return j; } static int equate_lines(struct hashtable *result, struct line *lines_a, struct line *lines_b, Py_ssize_t asize, Py_ssize_t bsize) { Py_ssize_t i, j, hsize; struct bucket *hashtable; /* check for overflow, we need the table to be at least bsize+1 */ if (bsize == PY_SSIZE_T_MAX) { PyErr_SetNone(PyExc_OverflowError); return 0; } /* build a hash table of the next highest power of 2 */ hsize = 1; while (hsize < bsize + 1) hsize *= 2; /* can't be 0 */ hashtable = (struct bucket *) guarded_malloc(sizeof(struct bucket) * hsize); if (hashtable == NULL) { PyErr_NoMemory(); return 0; } /* initialise the hashtable */ for (i = 0; i < hsize; i++) { hashtable[i].a_count = 0; hashtable[i].b_count = 0; hashtable[i].a_head = SENTINEL; hashtable[i].b_head = SENTINEL; } hsize--; /* add lines from lines_b to the hash table chains. iterating backwards so the matching lines are sorted to the linked list by the line number (because we are adding new lines to the head of the list) */ for (i = bsize - 1; i >= 0; i--) { /* find the first hashtable entry, which is either empty or contains the same line as lines_b[i] */ j = find_equivalence_class(hashtable, hsize, lines_b, lines_b, i); /* set the equivalence class */ lines_b[i].equiv = j; /* add to the head of the equivalence class */ lines_b[i].next = hashtable[j].b_head; hashtable[j].b_head = i; hashtable[j].b_count++; } /* match items from lines_a to their equivalence class in lines_b. again, iterating backwards for the right order of the linked lists */ for (i = asize - 1; i >= 0; i--) { /* find the first hash entry, which is either empty or contains the same line as lines_a[i] */ j = find_equivalence_class(hashtable, hsize, lines_a, lines_b, i); /* set the equivalence class, even if we are not interested in this line, because the values are not pre-filled */ lines_a[i].equiv = j; /* we are not interested in lines which are not also in lines_b */ if (hashtable[j].b_head == SENTINEL) continue; /* add to the head of the equivalence class */ lines_a[i].next = hashtable[j].a_head; hashtable[j].a_head = i; hashtable[j].a_count++; } result->last_a_pos = -1; result->last_b_pos = -1; result->size = hsize + 1; result->table = hashtable; return 1; } /* Finds longest common subsequence of unique lines in a[alo:ahi] and b[blo:bhi]. Parameter backpointers must have allocated memory for at least 4 * (bhi - blo) ints. */ Py_ssize_t unique_lcs(struct matching_line *answer, struct hashtable *hashtable, Py_ssize_t *backpointers, struct line *lines_a, struct line *lines_b, Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi) { Py_ssize_t i, k, equiv, apos, bpos, norm_apos, norm_bpos, bsize, stacksize; Py_ssize_t *stacks, *lasts, *btoa; struct bucket *h; k = 0; stacksize = 0; bsize = bhi - blo; h = hashtable->table; /* "unpack" the allocated memory */ stacks = backpointers + bsize; lasts = stacks + bsize; btoa = lasts + bsize; /* initialise the backpointers */ for (i = 0; i < bsize; i++) backpointers[i] = SENTINEL; if (hashtable->last_a_pos == -1 || hashtable->last_a_pos > alo) for (i = 0; i < hashtable->size; i++) h[i].a_pos = h[i].a_head; hashtable->last_a_pos = alo; if (hashtable->last_b_pos == -1 || hashtable->last_b_pos > blo) for (i = 0; i < hashtable->size; i++) h[i].b_pos = h[i].b_head; hashtable->last_b_pos = blo; for (bpos = blo; bpos < bhi; bpos++) { equiv = lines_b[bpos].equiv; /* no lines in a or b */ if (h[equiv].a_count == 0 || h[equiv].b_count == 0) continue; /* find an unique line in lines_a that matches lines_b[bpos] if we find more than one line within the range alo:ahi, jump to the next line from lines_b immediately */ apos = SENTINEL; /* loop through all lines in the linked list */ for (i = h[equiv].a_pos; i != SENTINEL; i = lines_a[i].next) { /* the index is lower than alo, continue to the next line */ if (i < alo) { h[equiv].a_pos = i; continue; } /* the index is higher than ahi, stop searching */ if (i >= ahi) break; /* if the line is within our range, check if it's a duplicate */ if (apos != SENTINEL) goto nextb; /* save index to the line */ apos = i; } /* this line has no equivalent in lines_a[alo:ahi] */ if (apos == SENTINEL) goto nextb; /* check for duplicates of this line in lines_b[blo:bhi] */ /* loop through all lines in the linked list */ for (i = h[equiv].b_pos; i != SENTINEL; i = lines_b[i].next) { /* the index is lower than blo, continue to the next line */ if (i < blo) { h[equiv].b_pos = i; continue; } /* the index is higher than bhi, stop searching */ if (i >= bhi) break; /* if this isn't the line with started with and it's within our range, it's a duplicate */ if (i != bpos) goto nextb; } /* use normalised indexes ([0,ahi-alo) instead of [alo,ahi)) for the patience sorting algorithm */ norm_bpos = bpos - blo; norm_apos = apos - alo; btoa[norm_bpos] = norm_apos; /* Ok, how does this work... We have a list of matching lines from two lists, a and b. These matches are stored in variable `btoa`. As we are iterating over this table by bpos, the lines from b already form an increasing sequence. We need to "sort" also the lines from a using the patience sorting algorithm, ignoring the lines which would need to be swapped. http://en.wikipedia.org/wiki/Patience_sorting For each pair of lines, we need to place the line from a on either an existing pile that has higher value on the top or create a new pile. Variable `stacks` represents the tops of these piles and in variable `lasts` we store the lines from b, that correspond to the lines from a in `stacks`. Whenever we place a new line on top of a pile, we store a backpointer to the line (b) from top of the previous pile. This means that after the loop, variable `backpointers` will contain an index to the previous matching lines that forms an increasing sequence (over both indexes a and b) with the current matching lines. If either index a or b of the previous matching lines would be higher than indexes of the current one or if the indexes of the current one are 0, it will contain SENTINEL. To construct the LCS, we will just need to follow these backpointers from the top of the last pile and stop when we reach SENTINEL. */ /* as an optimization, check if the next line comes at the end, because it usually does */ if (stacksize && stacks[stacksize - 1] < norm_apos) k = stacksize; /* as an optimization, check if the next line comes right after the previous line, because usually it does */ else if (stacksize && (stacks[k] < norm_apos) && (k == stacksize - 1 || stacks[k + 1] > norm_apos)) k += 1; else k = bisect_left(stacks, norm_apos, 0, stacksize); if (k > 0) backpointers[norm_bpos] = lasts[k - 1]; if (k < stacksize) { stacks[k] = norm_apos; lasts[k] = norm_bpos; } else { stacks[stacksize] = norm_apos; lasts[stacksize] = norm_bpos; stacksize += 1; } nextb: ; } if (stacksize == 0) return 0; /* backtrace the structures to find the LCS */ i = 0; k = lasts[stacksize - 1]; while (k != SENTINEL) { answer[i].a = btoa[k]; answer[i].b = k; k = backpointers[k]; i++; } return i; } /* Adds a new line to the list of matching blocks, either extending the current block or adding a new one. */ static inline void add_matching_line(struct matching_blocks *answer, Py_ssize_t a, Py_ssize_t b) { Py_ssize_t last_index = answer->count - 1; if ((last_index >= 0) && (a == answer->matches[last_index].a + answer->matches[last_index].len) && (b == answer->matches[last_index].b + answer->matches[last_index].len)) { /* enlarge the last block */ answer->matches[last_index].len++; } else { /* create a new block */ last_index++; answer->matches[last_index].a = a; answer->matches[last_index].b = b; answer->matches[last_index].len = 1; answer->count++; } } static int recurse_matches(struct matching_blocks *answer, struct hashtable *hashtable, Py_ssize_t *backpointers, struct line *a, struct line *b, Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi, int maxrecursion) { int res; Py_ssize_t new, last_a_pos, last_b_pos, lcs_size, nahi, nbhi, i, apos, bpos; struct matching_line *lcs; if (maxrecursion < 0) return 1; if (alo == ahi || blo == bhi) return 1; new = 0; last_a_pos = alo - 1; last_b_pos = blo - 1; lcs = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * (bhi - blo)); if (lcs == NULL) return 0; lcs_size = unique_lcs(lcs, hashtable, backpointers, a, b, alo, blo, ahi, bhi); /* recurse between lines which are unique in each file and match */ for (i = lcs_size - 1; i >= 0; i--) { apos = alo + lcs[i].a; bpos = blo + lcs[i].b; if (last_a_pos + 1 != apos || last_b_pos + 1 != bpos) { res = recurse_matches(answer, hashtable, backpointers, a, b, last_a_pos + 1, last_b_pos + 1, apos, bpos, maxrecursion - 1); if (!res) goto error; } last_a_pos = apos; last_b_pos = bpos; add_matching_line(answer, apos, bpos); new = 1; } free(lcs); lcs = NULL; /* find matches between the last match and the end */ if (new > 0) { res = recurse_matches(answer, hashtable, backpointers, a, b, last_a_pos + 1, last_b_pos + 1, ahi, bhi, maxrecursion - 1); if (!res) goto error; } /* find matching lines at the very beginning */ else if (a[alo].equiv == b[blo].equiv) { while (alo < ahi && blo < bhi && a[alo].equiv == b[blo].equiv) add_matching_line(answer, alo++, blo++); res = recurse_matches(answer, hashtable, backpointers, a, b, alo, blo, ahi, bhi, maxrecursion - 1); if (!res) goto error; } /* find matching lines at the very end */ else if (a[ahi - 1].equiv == b[bhi - 1].equiv) { nahi = ahi - 1; nbhi = bhi - 1; while (nahi > alo && nbhi > blo && a[nahi - 1].equiv == b[nbhi - 1].equiv) { nahi--; nbhi--; } res = recurse_matches(answer, hashtable, backpointers, a, b, last_a_pos + 1, last_b_pos + 1, nahi, nbhi, maxrecursion - 1); if (!res) goto error; for (i = 0; i < ahi - nahi; i++) add_matching_line(answer, nahi + i, nbhi + i); } return 1; error: free(lcs); return 0; } static void delete_lines(struct line *lines, Py_ssize_t size) { struct line *line = lines; while (size-- > 0) { Py_XDECREF(line->data); line++; } free(lines); } static Py_ssize_t load_lines(PyObject *orig, struct line **lines) { Py_ssize_t size, i; struct line *line; PyObject *seq, *item; seq = PySequence_Fast(orig, "sequence expected"); if (seq == NULL) { return -1; } size = PySequence_Fast_GET_SIZE(seq); if (size == 0) { Py_DECREF(seq); return 0; } /* Allocate a memory block for line data, initialized to 0 */ line = *lines = (struct line *)calloc(size, sizeof(struct line)); if (line == NULL) { PyErr_NoMemory(); Py_DECREF(seq); return -1; } for (i = 0; i < size; i++) { item = PySequence_Fast_GET_ITEM(seq, i); Py_INCREF(item); line->data = item; line->hash = PyObject_Hash(item); if (line->hash == (-1)) { /* Propogate the hash exception */ size = -1; goto cleanup; } line->next = SENTINEL; line++; } cleanup: Py_DECREF(seq); if (size == -1) { /* Error -- cleanup unused object references */ delete_lines(*lines, i); *lines = NULL; } return size; } static PyObject * py_unique_lcs(PyObject *self, PyObject *args) { PyObject *aseq, *bseq, *res, *item; Py_ssize_t asize, bsize, i, nmatches, *backpointers = NULL; struct line *a = NULL, *b = NULL; struct matching_line *matches = NULL; struct hashtable hashtable; if (!PyArg_ParseTuple(args, "OO", &aseq, &bseq)) return NULL; hashtable.table = NULL; asize = load_lines(aseq, &a); bsize = load_lines(bseq, &b); if (asize == -1 || bsize == -1) goto error; if (!equate_lines(&hashtable, a, b, asize, bsize)) goto error; if (bsize > 0) { matches = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * bsize); if (matches == NULL) goto error; backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4); if (backpointers == NULL) goto error; } nmatches = unique_lcs(matches, &hashtable, backpointers, a, b, 0, 0, asize, bsize); res = PyList_New(nmatches); for (i = 0; i < nmatches; i++) { #if PY_VERSION_HEX < 0x02050000 item = Py_BuildValue("ii", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b); #else item = Py_BuildValue("nn", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b); #endif if (item == NULL) goto error; if (PyList_SetItem(res, i, item) != 0) goto error; } free(backpointers); free(matches); free(hashtable.table); delete_lines(b, bsize); delete_lines(a, asize); return res; error: free(backpointers); free(matches); free(hashtable.table); delete_lines(b, bsize); delete_lines(a, asize); return NULL; } static PyObject * py_recurse_matches(PyObject *self, PyObject *args) { PyObject *aseq, *bseq, *item, *answer; int maxrecursion, res; Py_ssize_t i, j, asize, bsize, alo, blo, ahi, bhi; Py_ssize_t *backpointers = NULL; struct line *a = NULL, *b = NULL; struct hashtable hashtable; struct matching_blocks matches; #if PY_VERSION_HEX < 0x02050000 if (!PyArg_ParseTuple(args, "OOiiiiOi", &aseq, &bseq, &alo, &blo, &ahi, &bhi, &answer, &maxrecursion)) #else if (!PyArg_ParseTuple(args, "OOnnnnOi", &aseq, &bseq, &alo, &blo, &ahi, &bhi, &answer, &maxrecursion)) #endif return NULL; hashtable.table = NULL; matches.matches = NULL; asize = load_lines(aseq, &a); bsize = load_lines(bseq, &b); if (asize == -1 || bsize == -1) goto error; if (!equate_lines(&hashtable, a, b, asize, bsize)) goto error; matches.count = 0; if (bsize > 0) { matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * bsize); if (matches.matches == NULL) goto error; backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4); if (backpointers == NULL) goto error; } else { matches.matches = NULL; backpointers = NULL; } res = recurse_matches(&matches, &hashtable, backpointers, a, b, alo, blo, ahi, bhi, maxrecursion); if (!res) goto error; for (i = 0; i < matches.count; i++) { for (j = 0; j < matches.matches[i].len; j++) { #if PY_VERSION_HEX < 0x02050000 item = Py_BuildValue("ii", matches.matches[i].a + j, matches.matches[i].b + j); #else item = Py_BuildValue("nn", matches.matches[i].a + j, matches.matches[i].b + j); #endif if (item == NULL) goto error; if (PyList_Append(answer, item) != 0) goto error; } } free(backpointers); free(matches.matches); free(hashtable.table); delete_lines(b, bsize); delete_lines(a, asize); Py_RETURN_NONE; error: free(backpointers); free(matches.matches); free(hashtable.table); delete_lines(b, bsize); delete_lines(a, asize); return NULL; } static PyObject * PatienceSequenceMatcher_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *junk, *a, *b; PatienceSequenceMatcher *self; self = (PatienceSequenceMatcher *)type->tp_alloc(type, 0); if (self != NULL) { if (!PyArg_ParseTuple(args, "OOO", &junk, &a, &b)) { Py_DECREF(self); return NULL; } self->asize = load_lines(a, &(self->a)); self->bsize = load_lines(b, &(self->b)); if (self->asize == -1 || self->bsize == -1) { Py_DECREF(self); return NULL; } if (!equate_lines(&self->hashtable, self->a, self->b, self->asize, self->bsize)) { Py_DECREF(self); return NULL; } if (self->bsize > 0) { self->backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * self->bsize * 4); if (self->backpointers == NULL) { Py_DECREF(self); PyErr_NoMemory(); return NULL; } } else { self->backpointers = NULL; } } return (PyObject *)self; } static void PatienceSequenceMatcher_dealloc(PatienceSequenceMatcher* self) { free(self->backpointers); free(self->hashtable.table); delete_lines(self->b, self->bsize); delete_lines(self->a, self->asize); self->ob_type->tp_free((PyObject *)self); } static char PatienceSequenceMatcher_get_matching_blocks_doc[] = "Return list of triples describing matching subsequences.\n" "\n" "Each triple is of the form (i, j, n), and means that\n" "a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in\n" "i and in j.\n" "\n" "The last triple is a dummy, (len(a), len(b), 0), and is the only\n" "triple with n==0.\n" "\n" ">>> s = PatienceSequenceMatcher(None, \"abxcd\", \"abcd\")\n" ">>> s.get_matching_blocks()\n" "[(0, 0, 2), (3, 2, 2), (5, 4, 0)]\n"; static PyObject * PatienceSequenceMatcher_get_matching_blocks(PatienceSequenceMatcher* self) { PyObject *answer, *item; int res; Py_ssize_t i; struct matching_blocks matches; matches.count = 0; if (self->bsize > 0) { matches.matches = (struct matching_block *) guarded_malloc(sizeof(struct matching_block) * self->bsize); if (matches.matches == NULL) return PyErr_NoMemory(); } else matches.matches = NULL; res = recurse_matches(&matches, &self->hashtable, self->backpointers, self->a, self->b, 0, 0, self->asize, self->bsize, 10); if (!res) { free(matches.matches); return PyErr_NoMemory(); } answer = PyList_New(matches.count + 1); if (answer == NULL) { free(matches.matches); return NULL; } for (i = 0; i < matches.count; i++) { #if PY_VERSION_HEX < 0x02050000 item = Py_BuildValue("iii", matches.matches[i].a, matches.matches[i].b, matches.matches[i].len); #else item = Py_BuildValue("nnn", matches.matches[i].a, matches.matches[i].b, matches.matches[i].len); #endif if (item == NULL) goto error; if (PyList_SetItem(answer, i, item) != 0) goto error; } #if PY_VERSION_HEX < 0x02050000 item = Py_BuildValue("iii", self->asize, self->bsize, 0); #else item = Py_BuildValue("nnn", self->asize, self->bsize, 0); #endif if (item == NULL) goto error; if (PyList_SetItem(answer, i, item) != 0) goto error; free(matches.matches); return answer; error: free(matches.matches); Py_DECREF(answer); return NULL; } static char PatienceSequenceMatcher_get_opcodes_doc[] = "Return list of 5-tuples describing how to turn a into b.\n" "\n" "Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple\n" "has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the\n" "tuple preceding it, and likewise for j1 == the previous j2.\n" "\n" "The tags are strings, with these meanings:\n" "\n" "'replace': a[i1:i2] should be replaced by b[j1:j2]\n" "'delete': a[i1:i2] should be deleted.\n" " Note that j1==j2 in this case.\n" "'insert': b[j1:j2] should be inserted at a[i1:i1].\n" " Note that i1==i2 in this case.\n" "'equal': a[i1:i2] == b[j1:j2]\n" "\n" ">>> a = \"qabxcd\"\n" ">>> b = \"abycdf\"\n" ">>> s = PatienceSequenceMatcher(None, a, b)\n" ">>> for tag, i1, i2, j1, j2 in s.get_opcodes():\n" "... print (\"%7s a[%d:%d] (%s) b[%d:%d] (%s)\" %\n" "... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))\n" " delete a[0:1] (q) b[0:0] ()\n" " equal a[1:3] (ab) b[0:2] (ab)\n" "replace a[3:4] (x) b[2:3] (y)\n" " equal a[4:6] (cd) b[3:5] (cd)\n" " insert a[6:6] () b[5:6] (f)\n"; static PyObject * PatienceSequenceMatcher_get_opcodes(PatienceSequenceMatcher* self) { PyObject *answer, *item; Py_ssize_t i, j, k, ai, bj; int tag, res; struct matching_blocks matches; matches.count = 0; matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1)); if (matches.matches == NULL) return PyErr_NoMemory(); res = recurse_matches(&matches, &self->hashtable, self->backpointers, self->a, self->b, 0, 0, self->asize, self->bsize, 10); if (!res) { free(matches.matches); return PyErr_NoMemory(); } matches.matches[matches.count].a = self->asize; matches.matches[matches.count].b = self->bsize; matches.matches[matches.count].len = 0; matches.count++; answer = PyList_New(0); if (answer == NULL) { free(matches.matches); return NULL; } i = j = 0; for (k = 0; k < matches.count; k++) { ai = matches.matches[k].a; bj = matches.matches[k].b; tag = -1; if (i < ai && j < bj) tag = OP_REPLACE; else if (i < ai) tag = OP_DELETE; else if (j < bj) tag = OP_INSERT; if (tag != -1) { #if PY_VERSION_HEX < 0x02050000 item = Py_BuildValue("siiii", opcode_names[tag], i, ai, j, bj); #else item = Py_BuildValue("snnnn", opcode_names[tag], i, ai, j, bj); #endif if (item == NULL) goto error; if (PyList_Append(answer, item) != 0) goto error; } i = ai + matches.matches[k].len; j = bj + matches.matches[k].len; if (matches.matches[k].len > 0) { #if PY_VERSION_HEX < 0x02050000 item = Py_BuildValue("siiii", opcode_names[OP_EQUAL], ai, i, bj, j); #else item = Py_BuildValue("snnnn", opcode_names[OP_EQUAL], ai, i, bj, j); #endif if (item == NULL) goto error; if (PyList_Append(answer, item) != 0) goto error; } } free(matches.matches); return answer; error: free(matches.matches); Py_DECREF(answer); return NULL; } static char PatienceSequenceMatcher_get_grouped_opcodes_doc[] = "Isolate change clusters by eliminating ranges with no changes.\n" "\n" "Return a list of groups with upto n lines of context.\n" "Each group is in the same format as returned by get_opcodes().\n" "\n" ">>> from pprint import pprint\n" ">>> a = map(str, range(1,40))\n" ">>> b = a[:]\n" ">>> b[8:8] = ['i'] # Make an insertion\n" ">>> b[20] += 'x' # Make a replacement\n" ">>> b[23:28] = [] # Make a deletion\n" ">>> b[30] += 'y' # Make another replacement\n" ">>> pprint(PatienceSequenceMatcher(None,a,b).get_grouped_opcodes())\n" "[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],\n" " [('equal', 16, 19, 17, 20),\n" " ('replace', 19, 20, 20, 21),\n" " ('equal', 20, 22, 21, 23),\n" " ('delete', 22, 27, 23, 23),\n" " ('equal', 27, 30, 23, 26)],\n" " [('equal', 31, 34, 27, 30),\n" " ('replace', 34, 35, 30, 31),\n" " ('equal', 35, 38, 31, 34)]]\n"; static PyObject * PatienceSequenceMatcher_get_grouped_opcodes(PatienceSequenceMatcher* self, PyObject *args) { PyObject *answer, *group, *item; Py_ssize_t i, j, k, ai, bj, size, ncodes, tag; Py_ssize_t i1, i2, j1, j2; int n = 3, nn, res; struct matching_blocks matches; struct opcode *codes; if (!PyArg_ParseTuple(args, "|i", &n)) return NULL; matches.count = 0; matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1)); if (matches.matches == NULL) return PyErr_NoMemory(); res = recurse_matches(&matches, &self->hashtable, self->backpointers, self->a, self->b, 0, 0, self->asize, self->bsize, 10); if (!res) { free(matches.matches); return PyErr_NoMemory(); } matches.matches[matches.count].a = self->asize; matches.matches[matches.count].b = self->bsize; matches.matches[matches.count].len = 0; matches.count++; ncodes = 0; codes = (struct opcode *)guarded_malloc(sizeof(struct opcode) * matches.count * 2); if (codes == NULL) { free(matches.matches); return PyErr_NoMemory(); } i = j = 0; for (k = 0; k < matches.count; k++) { ai = matches.matches[k].a; bj = matches.matches[k].b; tag = -1; if (i < ai && j < bj) tag = OP_REPLACE; else if (i < ai) tag = OP_DELETE; else if (j < bj) tag = OP_INSERT; if (tag != -1) { codes[ncodes].tag = tag; codes[ncodes].i1 = i; codes[ncodes].i2 = ai; codes[ncodes].j1 = j; codes[ncodes].j2 = bj; ncodes++; } i = ai + matches.matches[k].len; j = bj + matches.matches[k].len; if (matches.matches[k].len > 0) { codes[ncodes].tag = OP_EQUAL; codes[ncodes].i1 = ai; codes[ncodes].i2 = i; codes[ncodes].j1 = bj; codes[ncodes].j2 = j; ncodes++; } } if (ncodes == 0) { codes[ncodes].tag = OP_EQUAL; codes[ncodes].i1 = 0; codes[ncodes].i2 = 1; codes[ncodes].j1 = 0; codes[ncodes].j2 = 1; ncodes++; } /* fixup leading and trailing groups if they show no changes. */ if (codes[0].tag == OP_EQUAL) { codes[0].i1 = MAX(codes[0].i1, codes[0].i2 - n); codes[0].j1 = MAX(codes[0].j1, codes[0].j2 - n); } if (codes[ncodes - 1].tag == OP_EQUAL) { codes[ncodes - 1].i2 = MIN(codes[ncodes - 1].i2, codes[ncodes - 1].i1 + n); codes[ncodes - 1].j2 = MIN(codes[ncodes - 1].j2, codes[ncodes - 1].j1 + n); } group = NULL; answer = PyList_New(0); if (answer == NULL) goto error; group = PyList_New(0); if (group == NULL) goto error; nn = n + n; tag = -1; for (i = 0; i < ncodes; i++) { tag = codes[i].tag; i1 = codes[i].i1; i2 = codes[i].i2; j1 = codes[i].j1; j2 = codes[i].j2; /* end the current group and start a new one whenever there is a large range with no changes. */ if (tag == OP_EQUAL && i2 - i1 > nn) { #if PY_VERSION_HEX < 0x02050000 item = Py_BuildValue("siiii", opcode_names[tag], i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n)); #else item = Py_BuildValue("snnnn", opcode_names[tag], i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n)); #endif if (item == NULL) goto error; if (PyList_Append(group, item) != 0) goto error; if (PyList_Append(answer, group) != 0) goto error; group = PyList_New(0); if (group == NULL) goto error; i1 = MAX(i1, i2 - n); j1 = MAX(j1, j2 - n); } #if PY_VERSION_HEX < 0x02050000 item = Py_BuildValue("siiii", opcode_names[tag], i1, i2, j1 ,j2); #else item = Py_BuildValue("snnnn", opcode_names[tag], i1, i2, j1 ,j2); #endif if (item == NULL) goto error; if (PyList_Append(group, item) != 0) goto error; } size = PyList_Size(group); if (size > 0 && !(size == 1 && tag == OP_EQUAL)) { if (PyList_Append(answer, group) != 0) goto error; } else Py_DECREF(group); free(codes); free(matches.matches); return answer; error: free(codes); free(matches.matches); Py_DECREF(group); Py_DECREF(answer); return NULL; } static PyMethodDef PatienceSequenceMatcher_methods[] = { {"get_matching_blocks", (PyCFunction)PatienceSequenceMatcher_get_matching_blocks, METH_NOARGS, PatienceSequenceMatcher_get_matching_blocks_doc}, {"get_opcodes", (PyCFunction)PatienceSequenceMatcher_get_opcodes, METH_NOARGS, PatienceSequenceMatcher_get_opcodes_doc}, {"get_grouped_opcodes", (PyCFunction)PatienceSequenceMatcher_get_grouped_opcodes, METH_VARARGS, PatienceSequenceMatcher_get_grouped_opcodes_doc}, {NULL} }; static char PatienceSequenceMatcher_doc[] = "C implementation of PatienceSequenceMatcher"; static PyTypeObject PatienceSequenceMatcherType = { PyObject_HEAD_INIT(NULL) 0, /* ob_size */ "PatienceSequenceMatcher", /* tp_name */ sizeof(PatienceSequenceMatcher), /* tp_basicsize */ 0, /* tp_itemsize */ (destructor)PatienceSequenceMatcher_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT, /* tp_flags*/ PatienceSequenceMatcher_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ PatienceSequenceMatcher_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ 0, /* tp_alloc */ PatienceSequenceMatcher_new, /* tp_new */ }; static PyMethodDef cpatiencediff_methods[] = { {"unique_lcs_c", py_unique_lcs, METH_VARARGS}, {"recurse_matches_c", py_recurse_matches, METH_VARARGS}, {NULL, NULL} }; PyMODINIT_FUNC init_patiencediff_c(void) { PyObject* m; if (PyType_Ready(&PatienceSequenceMatcherType) < 0) return; m = Py_InitModule3("_patiencediff_c", cpatiencediff_methods, "C implementation of PatienceSequenceMatcher"); if (m == NULL) return; Py_INCREF(&PatienceSequenceMatcherType); PyModule_AddObject(m, "PatienceSequenceMatcher_c", (PyObject *)&PatienceSequenceMatcherType); } /* vim: sw=4 et */ bzr-2.7.0/bzrlib/_patiencediff_py.py0000755000000000000000000002173511673403246015612 0ustar 00000000000000#!/usr/bin/env python # Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import from bisect import bisect import difflib from bzrlib.trace import mutter __all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files'] def unique_lcs_py(a, b): """Find the longest common subset for unique lines. :param a: An indexable object (such as string or list of strings) :param b: Another indexable object (such as string or list of strings) :return: A list of tuples, one for each line which is matched. [(line_in_a, line_in_b), ...] This only matches lines which are unique on both sides. This helps prevent common lines from over influencing match results. The longest common subset uses the Patience Sorting algorithm: http://en.wikipedia.org/wiki/Patience_sorting """ # set index[line in a] = position of line in a unless # a is a duplicate, in which case it's set to None index = {} for i in xrange(len(a)): line = a[i] if line in index: index[line] = None else: index[line]= i # make btoa[i] = position of line i in a, unless # that line doesn't occur exactly once in both, # in which case it's set to None btoa = [None] * len(b) index2 = {} for pos, line in enumerate(b): next = index.get(line) if next is not None: if line in index2: # unset the previous mapping, which we now know to # be invalid because the line isn't unique btoa[index2[line]] = None del index[line] else: index2[line] = pos btoa[pos] = next # this is the Patience sorting algorithm # see http://en.wikipedia.org/wiki/Patience_sorting backpointers = [None] * len(b) stacks = [] lasts = [] k = 0 for bpos, apos in enumerate(btoa): if apos is None: continue # as an optimization, check if the next line comes at the end, # because it usually does if stacks and stacks[-1] < apos: k = len(stacks) # as an optimization, check if the next line comes right after # the previous line, because usually it does elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos): k += 1 else: k = bisect(stacks, apos) if k > 0: backpointers[bpos] = lasts[k-1] if k < len(stacks): stacks[k] = apos lasts[k] = bpos else: stacks.append(apos) lasts.append(bpos) if len(lasts) == 0: return [] result = [] k = lasts[-1] while k is not None: result.append((btoa[k], k)) k = backpointers[k] result.reverse() return result def recurse_matches_py(a, b, alo, blo, ahi, bhi, answer, maxrecursion): """Find all of the matching text in the lines of a and b. :param a: A sequence :param b: Another sequence :param alo: The start location of a to check, typically 0 :param ahi: The start location of b to check, typically 0 :param ahi: The maximum length of a to check, typically len(a) :param bhi: The maximum length of b to check, typically len(b) :param answer: The return array. Will be filled with tuples indicating [(line_in_a, line_in_b)] :param maxrecursion: The maximum depth to recurse. Must be a positive integer. :return: None, the return value is in the parameter answer, which should be a list """ if maxrecursion < 0: mutter('max recursion depth reached') # this will never happen normally, this check is to prevent DOS attacks return oldlength = len(answer) if alo == ahi or blo == bhi: return last_a_pos = alo-1 last_b_pos = blo-1 for apos, bpos in unique_lcs_py(a[alo:ahi], b[blo:bhi]): # recurse between lines which are unique in each file and match apos += alo bpos += blo # Most of the time, you will have a sequence of similar entries if last_a_pos+1 != apos or last_b_pos+1 != bpos: recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1, apos, bpos, answer, maxrecursion - 1) last_a_pos = apos last_b_pos = bpos answer.append((apos, bpos)) if len(answer) > oldlength: # find matches between the last match and the end recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1, ahi, bhi, answer, maxrecursion - 1) elif a[alo] == b[blo]: # find matching lines at the very beginning while alo < ahi and blo < bhi and a[alo] == b[blo]: answer.append((alo, blo)) alo += 1 blo += 1 recurse_matches_py(a, b, alo, blo, ahi, bhi, answer, maxrecursion - 1) elif a[ahi - 1] == b[bhi - 1]: # find matching lines at the very end nahi = ahi - 1 nbhi = bhi - 1 while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]: nahi -= 1 nbhi -= 1 recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1, nahi, nbhi, answer, maxrecursion - 1) for i in xrange(ahi - nahi): answer.append((nahi + i, nbhi + i)) def _collapse_sequences(matches): """Find sequences of lines. Given a sequence of [(line_in_a, line_in_b),] find regions where they both increment at the same time """ answer = [] start_a = start_b = None length = 0 for i_a, i_b in matches: if (start_a is not None and (i_a == start_a + length) and (i_b == start_b + length)): length += 1 else: if start_a is not None: answer.append((start_a, start_b, length)) start_a = i_a start_b = i_b length = 1 if length != 0: answer.append((start_a, start_b, length)) return answer def _check_consistency(answer): # For consistency sake, make sure all matches are only increasing next_a = -1 next_b = -1 for (a, b, match_len) in answer: if a < next_a: raise ValueError('Non increasing matches for a') if b < next_b: raise ValueError('Non increasing matches for b') next_a = a + match_len next_b = b + match_len class PatienceSequenceMatcher_py(difflib.SequenceMatcher): """Compare a pair of sequences using longest common subset.""" _do_check_consistency = True def __init__(self, isjunk=None, a='', b=''): if isjunk is not None: raise NotImplementedError('Currently we do not support' ' isjunk for sequence matching') difflib.SequenceMatcher.__init__(self, isjunk, a, b) def get_matching_blocks(self): """Return list of triples describing matching subsequences. Each triple is of the form (i, j, n), and means that a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in i and in j. The last triple is a dummy, (len(a), len(b), 0), and is the only triple with n==0. >>> s = PatienceSequenceMatcher(None, "abxcd", "abcd") >>> s.get_matching_blocks() [(0, 0, 2), (3, 2, 2), (5, 4, 0)] """ # jam 20060525 This is the python 2.4.1 difflib get_matching_blocks # implementation which uses __helper. 2.4.3 got rid of helper for # doing it inline with a queue. # We should consider doing the same for recurse_matches if self.matching_blocks is not None: return self.matching_blocks matches = [] recurse_matches_py(self.a, self.b, 0, 0, len(self.a), len(self.b), matches, 10) # Matches now has individual line pairs of # line A matches line B, at the given offsets self.matching_blocks = _collapse_sequences(matches) self.matching_blocks.append( (len(self.a), len(self.b), 0) ) if PatienceSequenceMatcher_py._do_check_consistency: if __debug__: _check_consistency(self.matching_blocks) return self.matching_blocks bzr-2.7.0/bzrlib/_readdir_py.py0000644000000000000000000000300711673635356014601 0ustar 00000000000000# Copyright (C) 2006, 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Python implementation of readdir interface.""" from __future__ import absolute_import import stat _directory = 'directory' _chardev = 'chardev' _block = 'block' _file = 'file' _fifo = 'fifo' _symlink = 'symlink' _socket = 'socket' _unknown = 'unknown' _formats = { stat.S_IFDIR:'directory', stat.S_IFCHR:'chardev', stat.S_IFBLK:'block', stat.S_IFREG:'file', stat.S_IFIFO:'fifo', stat.S_IFLNK:'symlink', stat.S_IFSOCK:'socket', } def _kind_from_mode(stat_mode, _formats=_formats, _unknown='unknown'): """Generate a file kind from a stat mode. This is used in walkdirs. It's performance is critical: Do not mutate without careful benchmarking. """ try: return _formats[stat_mode & 0170000] except KeyError: return _unknown bzr-2.7.0/bzrlib/_readdir_pyx.pyx0000644000000000000000000003014711375506163015156 0ustar 00000000000000# Copyright (C) 2006, 2008, 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Wrapper for readdir which returns files ordered by inode.""" import os import sys #python2.4 support cdef extern from "python-compat.h": pass cdef extern from 'errno.h': int ENOENT int ENOTDIR int EAGAIN int EINTR char *strerror(int errno) # not necessarily a real variable, but this should be close enough int errno cdef extern from 'unistd.h': int chdir(char *path) int close(int fd) int fchdir(int fd) char *getcwd(char *, int size) cdef extern from 'stdlib.h': void *malloc(int) void free(void *) cdef extern from 'sys/types.h': ctypedef long ssize_t ctypedef unsigned long size_t ctypedef long time_t ctypedef unsigned long ino_t ctypedef unsigned long long off_t ctypedef int mode_t cdef extern from 'sys/stat.h': cdef struct stat: int st_mode off_t st_size int st_dev ino_t st_ino int st_mtime int st_ctime int lstat(char *path, stat *buf) int S_ISDIR(int mode) int S_ISCHR(int mode) int S_ISBLK(int mode) int S_ISREG(int mode) int S_ISFIFO(int mode) int S_ISLNK(int mode) int S_ISSOCK(int mode) cdef extern from 'fcntl.h': int O_RDONLY int open(char *pathname, int flags, mode_t mode) cdef extern from 'Python.h': int PyErr_CheckSignals() except -1 char * PyString_AS_STRING(object) ctypedef int Py_ssize_t # Required for older pyrex versions ctypedef struct PyObject: pass Py_ssize_t PyString_Size(object s) object PyList_GetItem(object lst, Py_ssize_t index) void *PyList_GetItem_object_void "PyList_GET_ITEM" (object lst, int index) int PyList_Append(object lst, object item) except -1 void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index) int PyTuple_SetItem(void *, Py_ssize_t pos, object item) except -1 int PyTuple_SetItem_obj "PyTuple_SetItem" (void *, Py_ssize_t pos, PyObject * item) except -1 void Py_INCREF(object o) void Py_DECREF(object o) void PyString_Concat(PyObject **string, object newpart) cdef extern from 'dirent.h': ctypedef struct dirent: char d_name[256] ino_t d_ino # the opaque C library DIR type. ctypedef struct DIR # should be DIR *, pyrex barfs. DIR * opendir(char * name) int closedir(DIR * dir) dirent *readdir(DIR *dir) cdef object _directory _directory = 'directory' cdef object _chardev _chardev = 'chardev' cdef object _block _block = 'block' cdef object _file _file = 'file' cdef object _fifo _fifo = 'fifo' cdef object _symlink _symlink = 'symlink' cdef object _socket _socket = 'socket' cdef object _unknown _unknown = 'unknown' # add a typedef struct dirent dirent to workaround pyrex cdef extern from 'readdir.h': pass cdef class _Stat: """Represent a 'stat' result.""" cdef stat _st property st_dev: def __get__(self): return self._st.st_dev property st_ino: def __get__(self): return self._st.st_ino property st_mode: def __get__(self): return self._st.st_mode property st_ctime: def __get__(self): return self._st.st_ctime property st_mtime: def __get__(self): return self._st.st_mtime property st_size: def __get__(self): return self._st.st_size def __repr__(self): """Repr is the same as a Stat object. (mode, ino, dev, nlink, uid, gid, size, None(atime), mtime, ctime) """ return repr((self.st_mode, 0, 0, 0, 0, 0, self.st_size, None, self.st_mtime, self.st_ctime)) from bzrlib import osutils cdef object _safe_utf8 _safe_utf8 = osutils.safe_utf8 cdef class UTF8DirReader: """A dir reader for utf8 file systems.""" def kind_from_mode(self, int mode): """Get the kind of a path from a mode status.""" return self._kind_from_mode(mode) cdef _kind_from_mode(self, int mode): # Files and directories are the most common - check them first. if S_ISREG(mode): return _file if S_ISDIR(mode): return _directory if S_ISCHR(mode): return _chardev if S_ISBLK(mode): return _block if S_ISLNK(mode): return _symlink if S_ISFIFO(mode): return _fifo if S_ISSOCK(mode): return _socket return _unknown def top_prefix_to_starting_dir(self, top, prefix=""): """See DirReader.top_prefix_to_starting_dir.""" return (_safe_utf8(prefix), None, None, None, _safe_utf8(top)) def read_dir(self, prefix, top): """Read a single directory from a utf8 file system. All paths in and out are utf8. This sub-function is called when we know the filesystem is already in utf8 encoding. So we don't need to transcode filenames. See DirReader.read_dir for details. """ #cdef char *_prefix = prefix #cdef char *_top = top # Use C accelerated directory listing. cdef object newval cdef int index cdef int length cdef void * atuple cdef object name cdef PyObject * new_val_obj if PyString_Size(prefix): relprefix = prefix + '/' else: relprefix = '' top_slash = top + '/' # read_dir supplies in should-stat order. # for _, name in sorted(_listdir(top)): result = _read_dir(top) length = len(result) # result.sort() for index from 0 <= index < length: atuple = PyList_GetItem_object_void(result, index) name = PyTuple_GetItem_void_void(atuple, 1) # We have a tuple with (inode, name, None, statvalue, None) # Now edit it: # inode -> path_from_top # direct concat - faster than operator +. new_val_obj = relprefix Py_INCREF(relprefix) PyString_Concat(&new_val_obj, name) if NULL == new_val_obj: # PyString_Concat will have setup an exception, but how to get # at it? raise Exception("failed to strcat") PyTuple_SetItem_obj(atuple, 0, new_val_obj) # 1st None -> kind newval = self._kind_from_mode( (<_Stat>PyTuple_GetItem_void_void(atuple, 3)).st_mode) Py_INCREF(newval) PyTuple_SetItem(atuple, 2, newval) # 2nd None -> abspath # for all - the caller may need to stat files # etc. # direct concat - faster than operator +. new_val_obj = top_slash Py_INCREF(top_slash) PyString_Concat(&new_val_obj, name) if NULL == new_val_obj: # PyString_Concat will have setup an exception, but how to get # at it? raise Exception("failed to strcat") PyTuple_SetItem_obj(atuple, 4, new_val_obj) return result cdef raise_os_error(int errnum, char *msg_prefix, path): if errnum == EINTR: PyErr_CheckSignals() raise OSError(errnum, msg_prefix + strerror(errnum), path) cdef _read_dir(path): """Like os.listdir, this reads the contents of a directory. :param path: the directory to list. :return: a list of single-owner (the list) tuples ready for editing into the result tuples walkdirs needs to yield. They contain (inode, name, None, statvalue, None). """ cdef DIR *the_dir # currently this needs a fixup - the C code says 'dirent' but should say # 'struct dirent' cdef dirent * entry cdef dirent sentinel cdef char *name cdef int stat_result cdef _Stat statvalue global errno cdef int orig_dir_fd # Avoid chdir('') because it causes problems on Sun OS, and avoid this if # staying in . if path != "" and path != '.': # we change into the requested directory before reading, and back at the # end, because that turns out to make the stat calls measurably faster than # passing full paths every time. orig_dir_fd = open(".", O_RDONLY, 0) if orig_dir_fd == -1: raise_os_error(errno, "open: ", ".") if -1 == chdir(path): # Ignore the return value, because we are already raising an # exception close(orig_dir_fd) raise_os_error(errno, "chdir: ", path) else: orig_dir_fd = -1 try: the_dir = opendir(".") if NULL == the_dir: raise_os_error(errno, "opendir: ", path) try: result = [] entry = &sentinel while entry != NULL: # Unlike most libc functions, readdir needs errno set to 0 # beforehand so that eof can be distinguished from errors. See # while True: errno = 0 entry = readdir(the_dir) if entry == NULL and (errno == EAGAIN or errno == EINTR): if errno == EINTR: PyErr_CheckSignals() # try again continue else: break if entry == NULL: if errno == ENOTDIR or errno == 0: # We see ENOTDIR at the end of a normal directory. # As ENOTDIR for read_dir(file) is triggered on opendir, # we consider ENOTDIR to be 'no error'. continue else: raise_os_error(errno, "readdir: ", path) name = entry.d_name if not (name[0] == c"." and ( (name[1] == 0) or (name[1] == c"." and name[2] == 0)) ): statvalue = _Stat() stat_result = lstat(entry.d_name, &statvalue._st) if stat_result != 0: if errno != ENOENT: raise_os_error(errno, "lstat: ", path + "/" + entry.d_name) else: # the file seems to have disappeared after being # seen by readdir - perhaps a transient temporary # file. there's no point returning it. continue # We append a 5-tuple that can be modified in-place by the C # api: # inode to sort on (to replace with top_path) # name (to keep) # kind (None, to set) # statvalue (to keep) # abspath (None, to set) PyList_Append(result, (entry.d_ino, entry.d_name, None, statvalue, None)) finally: if -1 == closedir(the_dir): raise_os_error(errno, "closedir: ", path) finally: if -1 != orig_dir_fd: failed = False if -1 == fchdir(orig_dir_fd): # try to close the original directory anyhow failed = True if -1 == close(orig_dir_fd) or failed: raise_os_error(errno, "return to orig_dir: ", "") return result # vim: tw=79 ai expandtab sw=4 sts=4 bzr-2.7.0/bzrlib/_rio_py.py0000644000000000000000000000521711673635356013765 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Python implementation of _read_stanza_*.""" from __future__ import absolute_import import re from bzrlib.rio import ( Stanza, ) _tag_re = re.compile(r'^[-a-zA-Z0-9_]+$') def _valid_tag(tag): if type(tag) != str: raise TypeError(tag) return bool(_tag_re.match(tag)) def _read_stanza_utf8(line_iter): def iter_unicode_lines(): for line in line_iter: if type(line) != str: raise TypeError(line) yield line.decode('utf-8') return _read_stanza_unicode(iter_unicode_lines()) def _read_stanza_unicode(unicode_iter): stanza = Stanza() tag = None accum_value = None # TODO: jam 20060922 This code should raise real errors rather than # using 'assert' to process user input, or raising ValueError # rather than a more specific error. for line in unicode_iter: if line is None or line == u'': break # end of file if line == u'\n': break # end of stanza real_l = line if line[0] == u'\t': # continues previous value if tag is None: raise ValueError('invalid continuation line %r' % real_l) accum_value.append(u'\n' + line[1:-1]) else: # new tag:value line if tag is not None: stanza.add(tag, u''.join(accum_value)) try: colon_index = line.index(u': ') except ValueError: raise ValueError('tag/value separator not found in line %r' % real_l) tag = str(line[:colon_index]) if not _valid_tag(tag): raise ValueError("invalid rio tag %r" % (tag,)) accum_value = [line[colon_index+2:-1]] if tag is not None: # add last tag-value stanza.add(tag, u''.join(accum_value)) return stanza else: # didn't see any content return None bzr-2.7.0/bzrlib/_rio_pyx.pyx0000644000000000000000000002006411337021464014324 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Pyrex implementation of _read_stanza_*.""" #python2.4 support cdef extern from "python-compat.h": pass cdef extern from "stdlib.h": void *malloc(int) void *realloc(void *, int) void free(void *) cdef extern from "Python.h": ctypedef int Py_ssize_t # Required for older pyrex versions ctypedef int Py_UNICODE char *PyString_AS_STRING(object s) Py_ssize_t PyString_GET_SIZE(object t) except -1 object PyUnicode_DecodeUTF8(char *string, Py_ssize_t length, char *errors) object PyString_FromStringAndSize(char *s, Py_ssize_t len) int PyString_CheckExact(object) int PyUnicode_CheckExact(object) object PyUnicode_Join(object, object) object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *) Py_UNICODE *PyUnicode_AS_UNICODE(object) Py_UNICODE *PyUnicode_AsUnicode(object) Py_ssize_t PyUnicode_GET_SIZE(object) except -1 int PyList_Append(object, object) except -1 int Py_UNICODE_ISLINEBREAK(Py_UNICODE) object PyUnicode_FromUnicode(Py_UNICODE *, int) void *Py_UNICODE_COPY(Py_UNICODE *, Py_UNICODE *, int) cdef extern from "string.h": void *memcpy(void *, void *, int) from bzrlib.rio import Stanza cdef int _valid_tag_char(char c): # cannot_raise return (c == c'_' or c == c'-' or (c >= c'a' and c <= c'z') or (c >= c'A' and c <= c'Z') or (c >= c'0' and c <= c'9')) def _valid_tag(tag): cdef char *c_tag cdef Py_ssize_t c_len cdef int i if not PyString_CheckExact(tag): raise TypeError(tag) c_tag = PyString_AS_STRING(tag) c_len = PyString_GET_SIZE(tag) if c_len < 1: return False for i from 0 <= i < c_len: if not _valid_tag_char(c_tag[i]): return False return True cdef object _split_first_line_utf8(char *line, int len, char *value, Py_ssize_t *value_len): cdef int i for i from 0 <= i < len: if line[i] == c':': if line[i+1] != c' ': raise ValueError("invalid tag in line %r" % line) memcpy(value, line+i+2, len-i-2) value_len[0] = len-i-2 return PyString_FromStringAndSize(line, i) raise ValueError('tag/value separator not found in line %r' % line) cdef object _split_first_line_unicode(Py_UNICODE *line, int len, Py_UNICODE *value, Py_ssize_t *value_len): cdef int i for i from 0 <= i < len: if line[i] == c':': if line[i+1] != c' ': raise ValueError("invalid tag in line %r" % PyUnicode_FromUnicode(line, len)) memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE)) value_len[0] = len-i-2 return PyUnicode_EncodeASCII(line, i, "strict") raise ValueError("tag/value separator not found in line %r" % PyUnicode_FromUnicode(line, len)) def _read_stanza_utf8(line_iter): cdef char *c_line cdef Py_ssize_t c_len cdef char *accum_value, *new_accum_value cdef Py_ssize_t accum_len, accum_size pairs = [] tag = None accum_len = 0 accum_size = 4096 accum_value = malloc(accum_size) if accum_value == NULL: raise MemoryError try: for line in line_iter: if line is None: break # end of file if not PyString_CheckExact(line): raise TypeError("%r is not a plain string" % line) c_line = PyString_AS_STRING(line) c_len = PyString_GET_SIZE(line) if c_len < 1: break # end of file if c_len == 1 and c_line[0] == c"\n": break # end of stanza if accum_len + c_len > accum_size: accum_size = (accum_len + c_len) new_accum_value = realloc(accum_value, accum_size) if new_accum_value == NULL: raise MemoryError else: accum_value = new_accum_value if c_line[0] == c'\t': # continues previous value if tag is None: raise ValueError('invalid continuation line %r' % line) memcpy(accum_value+accum_len, c_line+1, c_len-1) accum_len = accum_len + c_len-1 else: # new tag:value line if tag is not None: PyList_Append(pairs, (tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict"))) tag = _split_first_line_utf8(c_line, c_len, accum_value, &accum_len) if not _valid_tag(tag): raise ValueError("invalid rio tag %r" % (tag,)) if tag is not None: # add last tag-value PyList_Append(pairs, (tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict"))) return Stanza.from_pairs(pairs) else: # didn't see any content return None finally: free(accum_value) def _read_stanza_unicode(unicode_iter): cdef Py_UNICODE *c_line cdef int c_len cdef Py_UNICODE *accum_value, *new_accum_value cdef Py_ssize_t accum_len, accum_size pairs = [] tag = None accum_len = 0 accum_size = 4096 accum_value = malloc(accum_size*sizeof(Py_UNICODE)) if accum_value == NULL: raise MemoryError try: for line in unicode_iter: if line is None: break # end of file if not PyUnicode_CheckExact(line): raise TypeError("%r is not a unicode string" % line) c_line = PyUnicode_AS_UNICODE(line) c_len = PyUnicode_GET_SIZE(line) if c_len < 1: break # end of file if Py_UNICODE_ISLINEBREAK(c_line[0]): break # end of stanza if accum_len + c_len > accum_size: accum_size = accum_len + c_len new_accum_value = realloc(accum_value, accum_size*sizeof(Py_UNICODE)) if new_accum_value == NULL: raise MemoryError else: accum_value = new_accum_value if c_line[0] == c'\t': # continues previous value, if tag is None: raise ValueError('invalid continuation line %r' % line) memcpy(&accum_value[accum_len], &c_line[1], (c_len-1)*sizeof(Py_UNICODE)) accum_len = accum_len + (c_len-1) else: # new tag:value line if tag is not None: PyList_Append(pairs, (tag, PyUnicode_FromUnicode(accum_value, accum_len-1))) tag = _split_first_line_unicode(c_line, c_len, accum_value, &accum_len) if not _valid_tag(tag): raise ValueError("invalid rio tag %r" % (tag,)) if tag is not None: # add last tag-value PyList_Append(pairs, (tag, PyUnicode_FromUnicode(accum_value, accum_len-1))) return Stanza.from_pairs(pairs) else: # didn't see any content return None finally: free(accum_value) bzr-2.7.0/bzrlib/_simple_set_pyx.pxd0000644000000000000000000001047411337021464015656 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Interface definition of a class like PySet but without caching the hash. This is generally useful when you want to 'intern' objects, etc. Note that this differs from Set in that we: 1) Don't have all of the .intersection, .difference, etc functions 2) Do return the object from the set via queries eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key """ cdef extern from "Python.h": ctypedef struct PyObject: pass cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]: """A class similar to PySet, but with simpler implementation. The main advantage is that this class uses only 2N memory to store N objects rather than 4N memory. The main trade-off is that we do not cache the hash value of saved objects. As such, it is assumed that computing the hash will be cheap (such as strings or tuples of strings, etc.) This also differs in that you can get back the objects that are stored (like a dict), but we also don't implement the complete list of 'set' operations (difference, intersection, etc). """ # Data structure definition: # This is a basic hash table using open addressing. # http://en.wikipedia.org/wiki/Open_addressing # Basically that means we keep an array of pointers to Python objects # (called a table). Each location in the array is called a 'slot'. # # An empty slot holds a NULL pointer, a slot where there was an item # which was then deleted will hold a pointer to _dummy, and a filled slot # points at the actual object which fills that slot. # # The table is always a power of two, and the default location where an # object is inserted is at hash(object) & (table_size - 1) # # If there is a collision, then we search for another location. The # specific algorithm is in _lookup. We search until we: # find the object # find an equivalent object (by tp_richcompare(obj1, obj2, Py_EQ)) # find a NULL slot # # When an object is deleted, we set its slot to _dummy. this way we don't # have to track whether there was a collision, and find the corresponding # keys. (The collision resolution algorithm makes that nearly impossible # anyway, because it depends on the upper bits of the hash.) # The main effect of this, is that if we find _dummy, then we can insert # an object there, but we have to keep searching until we find NULL to # know that the object is not present elsewhere. cdef Py_ssize_t _used # active cdef Py_ssize_t _fill # active + dummy cdef Py_ssize_t _mask # Table contains (mask+1) slots, a power of 2 cdef PyObject **_table # Pyrex/Cython doesn't support arrays to 'object' # so we manage it manually cdef PyObject *_get(self, object key) except? NULL cdef object _add(self, key) cdef int _discard(self, key) except -1 cdef int _insert_clean(self, PyObject *key) except -1 cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1 # TODO: might want to export the C api here, though it is all available from # the class object... cdef api SimpleSet SimpleSet_New() cdef api object SimpleSet_Add(object self, object key) cdef api int SimpleSet_Contains(object self, object key) except -1 cdef api int SimpleSet_Discard(object self, object key) except -1 cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL cdef api Py_ssize_t SimpleSet_Size(object self) except -1 cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key) except -1 bzr-2.7.0/bzrlib/_simple_set_pyx.pyx0000644000000000000000000005137511425576534015723 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Definition of a class that is similar to Set with some small changes.""" cdef extern from "python-compat.h": pass cdef extern from "Python.h": ctypedef unsigned long size_t ctypedef long (*hashfunc)(PyObject*) except -1 ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int) ctypedef int (*visitproc)(PyObject *, void *) ctypedef int (*traverseproc)(PyObject *, visitproc, void *) int Py_EQ void Py_INCREF(PyObject *) void Py_DECREF(PyObject *) ctypedef struct PyTypeObject: hashfunc tp_hash richcmpfunc tp_richcompare traverseproc tp_traverse PyTypeObject *Py_TYPE(PyObject *) # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and # thus silently truncates to 32-bits on 64-bit machines. long PyObject_Hash(PyObject *) except -1 void *PyMem_Malloc(size_t nbytes) void PyMem_Free(void *) void memset(void *, int, size_t) # Dummy is an object used to mark nodes that have been deleted. Since # collisions require us to move a node to an alternative location, if we just # set an entry to NULL on delete, we won't find any relocated nodes. # We have to use _dummy_obj because we need to keep a refcount to it, but we # also use _dummy as a pointer, because it avoids having to put all # over the code base. cdef object _dummy_obj cdef PyObject *_dummy _dummy_obj = object() _dummy = _dummy_obj cdef object _NotImplemented _NotImplemented = NotImplemented cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1: cdef long other_hash if this == other: return 1 other_hash = PyObject_Hash(other) if other_hash != this_hash: return 0 # This implements a subset of the PyObject_RichCompareBool functionality. # Namely it: # 1) Doesn't try to do anything with old-style classes # 2) Assumes that both objects have a tp_richcompare implementation, and # that if that is not enough to compare equal, then they are not # equal. (It doesn't try to cast them both to some intermediate form # that would compare equal.) res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ) if res is _NotImplemented: res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ) if res is _NotImplemented: return 0 if res: return 1 return 0 cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]: """This class can be used to track canonical forms for objects. It is similar in function to the interned dictionary that is used by strings. However: 1) It assumes that hash(obj) is cheap, so does not need to inline a copy of it 2) It only stores one reference to the object, rather than 2 (key vs key:value) As such, it uses 1/3rd the amount of memory to store a pointer to the interned object. """ # Attributes are defined in the .pxd file DEF DEFAULT_SIZE=1024 def __init__(self): cdef Py_ssize_t size, n_bytes size = DEFAULT_SIZE self._mask = size - 1 self._used = 0 self._fill = 0 n_bytes = sizeof(PyObject*) * size; self._table = PyMem_Malloc(n_bytes) if self._table == NULL: raise MemoryError() memset(self._table, 0, n_bytes) def __sizeof__(self): # Note: Pyrex doesn't allow sizeof(class) so we re-implement it here. # Bits are: # 1: PyObject # 2: vtable * # 3: 3 Py_ssize_t # 4: PyObject** # Note that we might get alignment, etc, wrong, but at least this is # better than no estimate at all # return sizeof(SimpleSet) + (self._mask + 1) * (sizeof(PyObject*)) return (sizeof(PyObject) + sizeof(void*) + 3*sizeof(Py_ssize_t) + sizeof(PyObject**) + (self._mask + 1) * sizeof(PyObject*)) def __dealloc__(self): if self._table != NULL: PyMem_Free(self._table) self._table = NULL property used: def __get__(self): return self._used property fill: def __get__(self): return self._fill property mask: def __get__(self): return self._mask def _memory_size(self): """Return the number of bytes of memory consumed by this class.""" return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1)) def __len__(self): return self._used def _test_lookup(self, key): cdef PyObject **slot slot = _lookup(self, key) if slot[0] == NULL: res = '' elif slot[0] == _dummy: res = '' else: res = slot[0] return (slot - self._table), res def __contains__(self, key): """Is key present in this SimpleSet.""" cdef PyObject **slot slot = _lookup(self, key) if slot[0] == NULL or slot[0] == _dummy: return False return True cdef PyObject *_get(self, object key) except? NULL: """Return the object (or nothing) define at the given location.""" cdef PyObject **slot slot = _lookup(self, key) if slot[0] == NULL or slot[0] == _dummy: return NULL return slot[0] def __getitem__(self, key): """Return a stored item that is equivalent to key.""" cdef PyObject *py_val py_val = self._get(key) if py_val == NULL: raise KeyError("Key %s is not present" % key) val = (py_val) return val cdef int _insert_clean(self, PyObject *key) except -1: """Insert a key into self.table. This is only meant to be used during times like '_resize', as it makes a lot of assuptions about keys not already being present, and there being no dummy entries. """ cdef size_t i, n_lookup cdef long the_hash cdef PyObject **table, **slot cdef Py_ssize_t mask mask = self._mask table = self._table the_hash = PyObject_Hash(key) i = the_hash for n_lookup from 0 <= n_lookup <= mask: # Don't loop forever slot = &table[i & mask] if slot[0] == NULL: slot[0] = key self._fill = self._fill + 1 self._used = self._used + 1 return 1 i = i + 1 + n_lookup raise RuntimeError('ran out of slots.') def _py_resize(self, min_used): """Do not use this directly, it is only exposed for testing.""" return self._resize(min_used) cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1: """Resize the internal table. The final table will be big enough to hold at least min_used entries. We will copy the data from the existing table over, leaving out dummy entries. :return: The new size of the internal table """ cdef Py_ssize_t new_size, n_bytes, remaining cdef PyObject **new_table, **old_table, **slot new_size = DEFAULT_SIZE while new_size <= min_used and new_size > 0: new_size = new_size << 1 # We rolled over our signed size field if new_size <= 0: raise MemoryError() # Even if min_used == self._mask + 1, and we aren't changing the actual # size, we will still run the algorithm so that dummy entries are # removed # TODO: Test this # if new_size < self._used: # raise RuntimeError('cannot shrink SimpleSet to something' # ' smaller than the number of used slots.') n_bytes = sizeof(PyObject*) * new_size; new_table = PyMem_Malloc(n_bytes) if new_table == NULL: raise MemoryError() old_table = self._table self._table = new_table memset(self._table, 0, n_bytes) self._mask = new_size - 1 self._used = 0 remaining = self._fill self._fill = 0 # Moving everything to the other table is refcount neutral, so we don't # worry about it. slot = old_table while remaining > 0: if slot[0] == NULL: # unused slot pass elif slot[0] == _dummy: # dummy slot remaining = remaining - 1 else: # active slot remaining = remaining - 1 self._insert_clean(slot[0]) slot = slot + 1 PyMem_Free(old_table) return new_size def add(self, key): """Similar to set.add(), start tracking this key. There is one small difference, which is that we return the object that is stored at the given location. (which is closer to the dict.setdefault() functionality.) """ return self._add(key) cdef object _add(self, key): cdef PyObject **slot, *py_key cdef int added py_key = key if (Py_TYPE(py_key).tp_richcompare == NULL or Py_TYPE(py_key).tp_hash == NULL): raise TypeError('Types added to SimpleSet must implement' ' both tp_richcompare and tp_hash') added = 0 # We need at least one empty slot assert self._used < self._mask slot = _lookup(self, key) if (slot[0] == NULL): Py_INCREF(py_key) self._fill = self._fill + 1 self._used = self._used + 1 slot[0] = py_key added = 1 elif (slot[0] == _dummy): Py_INCREF(py_key) self._used = self._used + 1 slot[0] = py_key added = 1 # No else: clause. If _lookup returns a pointer to # a live object, then we already have a value at this location. retval = (slot[0]) # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit if added and (self._fill * 3) >= ((self._mask + 1) * 2): # However, we always work for a load factor of 2:1 self._resize(self._used * 2) # Even if we resized and ended up moving retval into a different slot, # it is still the value that is held at the slot equivalent to 'key', # so we can still return it return retval def discard(self, key): """Remove key from the set, whether it exists or not. :return: False if the item did not exist, True if it did """ if self._discard(key): return True return False cdef int _discard(self, key) except -1: cdef PyObject **slot, *py_key slot = _lookup(self, key) if slot[0] == NULL or slot[0] == _dummy: return 0 self._used = self._used - 1 Py_DECREF(slot[0]) slot[0] = _dummy # PySet uses the heuristic: If more than 1/5 are dummies, then resize # them away # if ((so->_fill - so->_used) * 5 < so->mask) # However, we are planning on using this as an interning structure, in # which we will be putting a lot of objects. And we expect that large # groups of them are going to have the same lifetime. # Dummy entries hurt a little bit because they cause the lookup to keep # searching, but resizing is also rather expensive # For now, we'll just use their algorithm, but we may want to revisit # it if ((self._fill - self._used) * 5 > self._mask): self._resize(self._used * 2) return 1 def __iter__(self): return _SimpleSet_iterator(self) cdef class _SimpleSet_iterator: """Iterator over the SimpleSet structure.""" cdef Py_ssize_t pos cdef SimpleSet set cdef Py_ssize_t _used # track if things have been mutated while iterating cdef Py_ssize_t len # number of entries left def __init__(self, obj): self.set = obj self.pos = 0 self._used = self.set._used self.len = self.set._used def __iter__(self): return self def __next__(self): cdef Py_ssize_t mask, i cdef PyObject *key if self.set is None: raise StopIteration if self.set._used != self._used: # Force this exception to continue to be raised self._used = -1 raise RuntimeError("Set size changed during iteration") if not SimpleSet_Next(self.set, &self.pos, &key): self.set = None raise StopIteration # we found something the_key = key # INCREF self.len = self.len - 1 return the_key def __length_hint__(self): if self.set is not None and self._used == self.set._used: return self.len return 0 cdef api SimpleSet SimpleSet_New(): """Create a new SimpleSet object.""" return SimpleSet() cdef SimpleSet _check_self(object self): """Check that the parameter is not None. Pyrex/Cython will do type checking, but only to ensure that an object is either the right type or None. You can say "object foo not None" for pure python functions, but not for C functions. So this is just a helper for all the apis that need to do the check. """ cdef SimpleSet true_self if self is None: raise TypeError('self must not be None') true_self = self return true_self cdef PyObject **_lookup(SimpleSet self, object key) except NULL: """Find the slot where 'key' would fit. This is the same as a dicts 'lookup' function. :param key: An object we are looking up :param hash: The hash for key :return: The location in self.table where key should be put. location == NULL is an exception, but (*location) == NULL just indicates the slot is empty and can be used. """ # This uses Quadratic Probing: # http://en.wikipedia.org/wiki/Quadratic_probing # with c1 = c2 = 1/2 # This leads to probe locations at: # h0 = hash(k1) # h1 = h0 + 1 # h2 = h0 + 3 = h1 + 1 + 1 # h3 = h0 + 6 = h2 + 1 + 2 # h4 = h0 + 10 = h2 + 1 + 3 # Note that all of these are '& mask', but that is computed *after* the # offset. # This differs from the algorithm used by Set and Dict. Which, effectively, # use double-hashing, and a step size that starts large, but dwindles to # stepping one-by-one. # This gives more 'locality' in that if you have a collision at offset X, # the first fallback is X+1, which is fast to check. However, that means # that an object w/ hash X+1 will also check there, and then X+2 next. # However, for objects with differing hashes, their chains are different. # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ... # So different hashes diverge quickly. # A bigger problem is that we *only* ever use the lowest bits of the hash # So all integers (x + SIZE*N) will resolve into the same bucket, and all # use the same collision resolution. We may want to try to find a way to # incorporate the upper bits of the hash with quadratic probing. (For # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.) cdef size_t i, n_lookup cdef Py_ssize_t mask cdef long key_hash cdef PyObject **table, **slot, *cur, **free_slot, *py_key py_key = key # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit # (it treats hash() as returning an 'int' rather than a 'long') key_hash = PyObject_Hash(py_key) i = key_hash mask = self._mask table = self._table free_slot = NULL for n_lookup from 0 <= n_lookup <= mask: # Don't loop forever slot = &table[i & mask] cur = slot[0] if cur == NULL: # Found a blank spot if free_slot != NULL: # Did we find an earlier _dummy entry? return free_slot else: return slot if cur == py_key: # Found an exact pointer to the key return slot if cur == _dummy: if free_slot == NULL: free_slot = slot elif _is_equal(py_key, key_hash, cur): # Both py_key and cur belong in this slot, return it return slot i = i + 1 + n_lookup raise AssertionError('should never get here') cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL: """Find the slot where 'key' would fit. This is the same as a dicts 'lookup' function. This is a private api because mutating what you get without maintaing the other invariants is a 'bad thing'. :param key: An object we are looking up :param hash: The hash for key :return: The location in self._table where key should be put should never be NULL, but may reference a NULL (PyObject*) """ return _lookup(_check_self(self), key) cdef api object SimpleSet_Add(object self, object key): """Add a key to the SimpleSet (set). :param self: The SimpleSet to add the key to. :param key: The key to be added. If the key is already present, self will not be modified :return: The current key stored at the location defined by 'key'. This may be the same object, or it may be an equivalent object. (consider dict.setdefault(key, key)) """ return _check_self(self)._add(key) cdef api int SimpleSet_Contains(object self, object key) except -1: """Is key present in self?""" return (key in _check_self(self)) cdef api int SimpleSet_Discard(object self, object key) except -1: """Remove the object referenced at location 'key'. :param self: The SimpleSet being modified :param key: The key we are checking on :return: 1 if there was an object present, 0 if there was not, and -1 on error. """ return _check_self(self)._discard(key) cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL: """Get a pointer to the object present at location 'key'. This returns an object which is equal to key which was previously added to self. This returns a borrowed reference, as it may also return NULL if no value is present at that location. :param key: The value we are looking for :return: The object present at that location """ return _check_self(self)._get(key) cdef api Py_ssize_t SimpleSet_Size(object self) except -1: """Get the number of active entries in 'self'""" return _check_self(self)._used cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key) except -1: """Walk over items in a SimpleSet. :param pos: should be initialized to 0 by the caller, and will be updated by this function :param key: Will return a borrowed reference to key :return: 0 if nothing left, 1 if we are returning a new value """ cdef Py_ssize_t i, mask cdef SimpleSet true_self cdef PyObject **table true_self = _check_self(self) i = pos[0] if (i < 0): return 0 mask = true_self._mask table= true_self._table while (i <= mask and (table[i] == NULL or table[i] == _dummy)): i = i + 1 pos[0] = i + 1 if (i > mask): return 0 # All done if (key != NULL): key[0] = table[i] return 1 cdef int SimpleSet_traverse(SimpleSet self, visitproc visit, void *arg) except -1: """This is an implementation of 'tp_traverse' that hits the whole table. Cython/Pyrex don't seem to let you define a tp_traverse, and they only define one for you if you have an 'object' attribute. Since they don't support C arrays of objects, we access the PyObject * directly. """ cdef Py_ssize_t pos cdef PyObject *next_key cdef int ret pos = 0 while SimpleSet_Next(self, &pos, &next_key): ret = visit(next_key, arg) if ret: return ret return 0 # It is a little bit ugly to do this, but it works, and means that Meliae can # dump the total memory consumed by all child objects. (SimpleSet).tp_traverse = SimpleSet_traverse bzr-2.7.0/bzrlib/_static_tuple_c.c0000644000000000000000000007412711411560257015251 0ustar 00000000000000/* Copyright (C) 2009, 2010 Canonical Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* Must be defined before importing _static_tuple_c.h so that we get the right * linkage. */ #define STATIC_TUPLE_MODULE #include #include "python-compat.h" #include "_static_tuple_c.h" #include "_export_c_api.h" /* Pyrex 0.9.6.4 exports _simple_set_pyx_api as * import__simple_set_pyx(), while Pyrex 0.9.8.5 and Cython 0.11.3 export them * as import_bzrlib___simple_set_pyx(). As such, we just #define one to be * equivalent to the other in our internal code. */ #define import__simple_set_pyx import_bzrlib___simple_set_pyx #include "_simple_set_pyx_api.h" #if defined(__GNUC__) # define inline __inline__ #elif defined(_MSC_VER) # define inline __inline #else # define inline #endif /* The one and only StaticTuple with no values */ static StaticTuple *_empty_tuple = NULL; static PyObject *_interned_tuples = NULL; static inline int _StaticTuple_is_interned(StaticTuple *self) { return self->flags & STATIC_TUPLE_INTERNED_FLAG; } static PyObject * StaticTuple_as_tuple(StaticTuple *self) { PyObject *tpl = NULL, *obj = NULL; int i, len; len = self->size; tpl = PyTuple_New(len); if (!tpl) { /* Malloc failure */ return NULL; } for (i = 0; i < len; ++i) { obj = (PyObject *)self->items[i]; Py_INCREF(obj); PyTuple_SET_ITEM(tpl, i, obj); } return tpl; } static char StaticTuple_as_tuple_doc[] = "as_tuple() => tuple"; static StaticTuple * StaticTuple_Intern(StaticTuple *self) { PyObject *canonical_tuple = NULL; if (_interned_tuples == NULL || _StaticTuple_is_interned(self)) { Py_INCREF(self); return self; } /* SimpleSet_Add returns whatever object is present at self * or the new object if it needs to add it. */ canonical_tuple = SimpleSet_Add(_interned_tuples, (PyObject *)self); if (!canonical_tuple) { // Some sort of exception, propogate it. return NULL; } if (canonical_tuple != (PyObject *)self) { // There was already a tuple with that value return (StaticTuple *)canonical_tuple; } self->flags |= STATIC_TUPLE_INTERNED_FLAG; // The two references in the dict do not count, so that the StaticTuple // object does not become immortal just because it was interned. Py_REFCNT(self) -= 1; return self; } static char StaticTuple_Intern_doc[] = "intern() => unique StaticTuple\n" "Return a 'canonical' StaticTuple object.\n" "Similar to intern() for strings, this makes sure there\n" "is only one StaticTuple object for a given value\n." "Common usage is:\n" " key = StaticTuple('foo', 'bar').intern()\n"; static void StaticTuple_dealloc(StaticTuple *self) { int i, len; if (_StaticTuple_is_interned(self)) { /* revive dead object temporarily for Discard */ Py_REFCNT(self) = 2; if (SimpleSet_Discard(_interned_tuples, (PyObject*)self) != 1) Py_FatalError("deletion of interned StaticTuple failed"); self->flags &= ~STATIC_TUPLE_INTERNED_FLAG; } len = self->size; for (i = 0; i < len; ++i) { Py_XDECREF(self->items[i]); } Py_TYPE(self)->tp_free((PyObject *)self); } /* Similar to PyTuple_New() */ static StaticTuple * StaticTuple_New(Py_ssize_t size) { StaticTuple *stuple; if (size < 0 || size > 255) { /* Too big or too small */ PyErr_SetString(PyExc_ValueError, "StaticTuple(...)" " takes from 0 to 255 items"); return NULL; } if (size == 0 && _empty_tuple != NULL) { Py_INCREF(_empty_tuple); return _empty_tuple; } /* Note that we use PyObject_NewVar because we want to allocate a variable * width entry. However we *aren't* truly a PyVarObject because we don't * use a long for ob_size. Instead we use a plain 'size' that is an int, * and will be overloaded with flags in the future. * As such we do the alloc, and then have to clean up anything it does * incorrectly. */ stuple = PyObject_NewVar(StaticTuple, &StaticTuple_Type, size); if (stuple == NULL) { return NULL; } stuple->size = size; stuple->flags = 0; stuple->_unused0 = 0; stuple->_unused1 = 0; if (size > 0) { memset(stuple->items, 0, sizeof(PyObject *) * size); } #if STATIC_TUPLE_HAS_HASH stuple->hash = -1; #endif return stuple; } static StaticTuple * StaticTuple_FromSequence(PyObject *sequence) { StaticTuple *new = NULL; PyObject *as_tuple = NULL; PyObject *item; Py_ssize_t i, size; if (StaticTuple_CheckExact(sequence)) { Py_INCREF(sequence); return (StaticTuple *)sequence; } if (!PySequence_Check(sequence)) { as_tuple = PySequence_Tuple(sequence); if (as_tuple == NULL) goto done; sequence = as_tuple; } size = PySequence_Size(sequence); if (size == -1) { goto done; } new = StaticTuple_New(size); if (new == NULL) { goto done; } for (i = 0; i < size; ++i) { // This returns a new reference, which we then 'steal' with // StaticTuple_SET_ITEM item = PySequence_GetItem(sequence, i); if (item == NULL) { Py_DECREF(new); new = NULL; goto done; } StaticTuple_SET_ITEM(new, i, item); } done: Py_XDECREF(as_tuple); return (StaticTuple *)new; } static StaticTuple * StaticTuple_from_sequence(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *sequence; if (!PyArg_ParseTuple(args, "O", &sequence)) return NULL; return StaticTuple_FromSequence(sequence); } /* Check that all items we point to are 'valid' */ static int StaticTuple_check_items(StaticTuple *self) { int i; PyObject *obj; for (i = 0; i < self->size; ++i) { obj = self->items[i]; if (obj == NULL) { PyErr_SetString(PyExc_RuntimeError, "StaticTuple(...)" " should not have a NULL entry."); return 0; } if (PyString_CheckExact(obj) || StaticTuple_CheckExact(obj) || obj == Py_None || PyBool_Check(obj) || PyInt_CheckExact(obj) || PyLong_CheckExact(obj) || PyFloat_CheckExact(obj) || PyUnicode_CheckExact(obj) ) continue; PyErr_Format(PyExc_TypeError, "StaticTuple(...)" " requires that all items are one of" " str, StaticTuple, None, bool, int, long, float, or unicode" " not %s.", Py_TYPE(obj)->tp_name); return 0; } return 1; } static PyObject * StaticTuple_new_constructor(PyTypeObject *type, PyObject *args, PyObject *kwds) { StaticTuple *self; PyObject *obj = NULL; Py_ssize_t i, len = 0; if (type != &StaticTuple_Type) { PyErr_SetString(PyExc_TypeError, "we only support creating StaticTuple"); return NULL; } if (!PyTuple_CheckExact(args)) { PyErr_SetString(PyExc_TypeError, "args must be a tuple"); return NULL; } len = PyTuple_GET_SIZE(args); if (len < 0 || len > 255) { /* Check the length here so we can raise a TypeError instead of * StaticTuple_New's ValueError. */ PyErr_SetString(PyExc_TypeError, "StaticTuple(...)" " takes from 0 to 255 items"); return NULL; } self = (StaticTuple *)StaticTuple_New(len); if (self == NULL) { return NULL; } for (i = 0; i < len; ++i) { obj = PyTuple_GET_ITEM(args, i); Py_INCREF(obj); self->items[i] = obj; } if (!StaticTuple_check_items(self)) { type->tp_dealloc((PyObject *)self); return NULL; } return (PyObject *)self; } static PyObject * StaticTuple_repr(StaticTuple *self) { PyObject *as_tuple, *tuple_repr, *result; as_tuple = StaticTuple_as_tuple(self); if (as_tuple == NULL) { return NULL; } tuple_repr = PyObject_Repr(as_tuple); Py_DECREF(as_tuple); if (tuple_repr == NULL) { return NULL; } result = PyString_FromFormat("StaticTuple%s", PyString_AsString(tuple_repr)); return result; } static long StaticTuple_hash(StaticTuple *self) { /* adapted from tuplehash(), is the specific hash value considered * 'stable'? */ register long x, y; Py_ssize_t len = self->size; PyObject **p; long mult = 1000003L; #if STATIC_TUPLE_HAS_HASH if (self->hash != -1) { return self->hash; } #endif x = 0x345678L; p = self->items; // TODO: We could set specific flags if we know that, for example, all the // items are strings. I haven't seen a real-world benefit to that // yet, though. while (--len >= 0) { y = PyObject_Hash(*p++); if (y == -1) /* failure */ return -1; x = (x ^ y) * mult; /* the cast might truncate len; that doesn't change hash stability */ mult += (long)(82520L + len + len); } x += 97531L; if (x == -1) x = -2; #if STATIC_TUPLE_HAS_HASH self->hash = x; #endif return x; } static PyObject * StaticTuple_richcompare_to_tuple(StaticTuple *v, PyObject *wt, int op) { PyObject *vt; PyObject *result = NULL; vt = StaticTuple_as_tuple((StaticTuple *)v); if (vt == NULL) { goto done; } if (!PyTuple_Check(wt)) { PyErr_BadInternalCall(); goto done; } /* Now we have 2 tuples to compare, do it */ result = PyTuple_Type.tp_richcompare(vt, wt, op); done: Py_XDECREF(vt); return result; } /** Compare two objects to determine if they are equivalent. * The basic flow is as follows * 1) First make sure that both objects are StaticTuple instances. If they * aren't then cast self to a tuple, and have the tuple do the comparison. * 2) Special case comparison to Py_None, because it happens to occur fairly * often in the test suite. * 3) Special case when v and w are the same pointer. As we know the answer to * all queries without walking individual items. * 4) For all operations, we then walk the items to find the first paired * items that are not equal. * 5) If all items found are equal, we then check the length of self and * other to determine equality. * 6) If an item differs, then we apply "op" to those last two items. (eg. * StaticTuple(A, B) > StaticTuple(A, C) iff B > C) */ static PyObject * StaticTuple_richcompare(PyObject *v, PyObject *w, int op) { StaticTuple *v_st, *w_st; Py_ssize_t vlen, wlen, min_len, i; PyObject *v_obj, *w_obj; richcmpfunc string_richcompare; if (!StaticTuple_CheckExact(v)) { /* This has never triggered, according to python-dev it seems this * might trigger if '__op__' is defined but '__rop__' is not, sort of * case. Such as "None == StaticTuple()" */ fprintf(stderr, "self is not StaticTuple\n"); Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } v_st = (StaticTuple *)v; if (StaticTuple_CheckExact(w)) { /* The most common case */ w_st = (StaticTuple*)w; } else if (PyTuple_Check(w)) { /* One of v or w is a tuple, so we go the 'slow' route and cast up to * tuples to compare. */ /* TODO: This seems to be triggering more than I thought it would... * We probably want to optimize comparing self to other when * other is a tuple. */ return StaticTuple_richcompare_to_tuple(v_st, w, op); } else if (w == Py_None) { // None is always less than the object switch (op) { case Py_NE:case Py_GT:case Py_GE: Py_INCREF(Py_True); return Py_True; case Py_EQ:case Py_LT:case Py_LE: Py_INCREF(Py_False); return Py_False; default: // Should never happen return Py_NotImplemented; } } else { /* We don't special case this comparison, we just let python handle * it. */ Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } /* Now we know that we have 2 StaticTuple objects, so let's compare them. * This code is inspired from tuplerichcompare, except we know our * objects are limited in scope, so we can inline some comparisons. */ if (v == w) { /* Identical pointers, we can shortcut this easily. */ switch (op) { case Py_EQ:case Py_LE:case Py_GE: Py_INCREF(Py_True); return Py_True; case Py_NE:case Py_LT:case Py_GT: Py_INCREF(Py_False); return Py_False; } } if (op == Py_EQ && _StaticTuple_is_interned(v_st) && _StaticTuple_is_interned(w_st)) { /* If both objects are interned, we know they are different if the * pointer is not the same, which would have been handled by the * previous if. No need to compare the entries. */ Py_INCREF(Py_False); return Py_False; } /* The only time we are likely to compare items of different lengths is in * something like the interned_keys set. However, the hash is good enough * that it is rare. Note that 'tuple_richcompare' also does not compare * lengths here. */ vlen = v_st->size; wlen = w_st->size; min_len = (vlen < wlen) ? vlen : wlen; string_richcompare = PyString_Type.tp_richcompare; for (i = 0; i < min_len; i++) { PyObject *result = NULL; v_obj = StaticTuple_GET_ITEM(v_st, i); w_obj = StaticTuple_GET_ITEM(w_st, i); if (v_obj == w_obj) { /* Shortcut case, these must be identical */ continue; } if (PyString_CheckExact(v_obj) && PyString_CheckExact(w_obj)) { result = string_richcompare(v_obj, w_obj, Py_EQ); } else if (StaticTuple_CheckExact(v_obj) && StaticTuple_CheckExact(w_obj)) { /* Both are StaticTuple types, so recurse */ result = StaticTuple_richcompare(v_obj, w_obj, Py_EQ); } else { /* Fall back to generic richcompare */ result = PyObject_RichCompare(v_obj, w_obj, Py_EQ); } if (result == NULL) { return NULL; /* There seems to be an error */ } if (result == Py_False) { // This entry is not identical, Shortcut for Py_EQ if (op == Py_EQ) { return result; } Py_DECREF(result); break; } if (result != Py_True) { /* We don't know *what* richcompare is returning, but it * isn't something we recognize */ PyErr_BadInternalCall(); Py_DECREF(result); return NULL; } Py_DECREF(result); } if (i >= min_len) { /* We walked off one of the lists, but everything compared equal so * far. Just compare the size. */ int cmp; PyObject *res; switch (op) { case Py_LT: cmp = vlen < wlen; break; case Py_LE: cmp = vlen <= wlen; break; case Py_EQ: cmp = vlen == wlen; break; case Py_NE: cmp = vlen != wlen; break; case Py_GT: cmp = vlen > wlen; break; case Py_GE: cmp = vlen >= wlen; break; default: return NULL; /* cannot happen */ } if (cmp) res = Py_True; else res = Py_False; Py_INCREF(res); return res; } /* The last item differs, shortcut the Py_NE case */ if (op == Py_NE) { Py_INCREF(Py_True); return Py_True; } /* It is some other comparison, go ahead and do the real check. */ if (PyString_CheckExact(v_obj) && PyString_CheckExact(w_obj)) { return string_richcompare(v_obj, w_obj, op); } else if (StaticTuple_CheckExact(v_obj) && StaticTuple_CheckExact(w_obj)) { /* Both are StaticTuple types, so recurse */ return StaticTuple_richcompare(v_obj, w_obj, op); } else { return PyObject_RichCompare(v_obj, w_obj, op); } } static Py_ssize_t StaticTuple_length(StaticTuple *self) { return self->size; } static PyObject * StaticTuple__is_interned(StaticTuple *self) { if (_StaticTuple_is_interned(self)) { Py_INCREF(Py_True); return Py_True; } Py_INCREF(Py_False); return Py_False; } static char StaticTuple__is_interned_doc[] = "_is_interned() => True/False\n" "Check to see if this tuple has been interned.\n"; static PyObject * StaticTuple_reduce(StaticTuple *self) { PyObject *result = NULL, *as_tuple = NULL; result = PyTuple_New(2); if (!result) { return NULL; } as_tuple = StaticTuple_as_tuple(self); if (as_tuple == NULL) { Py_DECREF(result); return NULL; } Py_INCREF(&StaticTuple_Type); PyTuple_SET_ITEM(result, 0, (PyObject *)&StaticTuple_Type); PyTuple_SET_ITEM(result, 1, as_tuple); return result; } static char StaticTuple_reduce_doc[] = "__reduce__() => tuple\n"; static PyObject * StaticTuple_add(PyObject *v, PyObject *w) { Py_ssize_t i, len_v, len_w; PyObject *item; StaticTuple *result; /* StaticTuples and plain tuples may be added (concatenated) to * StaticTuples. */ if (StaticTuple_CheckExact(v)) { len_v = ((StaticTuple*)v)->size; } else if (PyTuple_Check(v)) { len_v = PyTuple_GET_SIZE(v); } else { Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } if (StaticTuple_CheckExact(w)) { len_w = ((StaticTuple*)w)->size; } else if (PyTuple_Check(w)) { len_w = PyTuple_GET_SIZE(w); } else { Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } result = StaticTuple_New(len_v + len_w); if (result == NULL) return NULL; for (i = 0; i < len_v; ++i) { // This returns a new reference, which we then 'steal' with // StaticTuple_SET_ITEM item = PySequence_GetItem(v, i); if (item == NULL) { Py_DECREF(result); return NULL; } StaticTuple_SET_ITEM(result, i, item); } for (i = 0; i < len_w; ++i) { item = PySequence_GetItem(w, i); if (item == NULL) { Py_DECREF(result); return NULL; } StaticTuple_SET_ITEM(result, i+len_v, item); } if (!StaticTuple_check_items(result)) { Py_DECREF(result); return NULL; } return (PyObject *)result; } static PyObject * StaticTuple_item(StaticTuple *self, Py_ssize_t offset) { PyObject *obj; /* We cast to (int) to avoid worrying about whether Py_ssize_t is a * long long, etc. offsets should never be >2**31 anyway. */ if (offset < 0) { PyErr_Format(PyExc_IndexError, "StaticTuple_item does not support" " negative indices: %d\n", (int)offset); } else if (offset >= self->size) { PyErr_Format(PyExc_IndexError, "StaticTuple index out of range" " %d >= %d", (int)offset, (int)self->size); return NULL; } obj = (PyObject *)self->items[offset]; Py_INCREF(obj); return obj; } static PyObject * StaticTuple_slice(StaticTuple *self, Py_ssize_t ilow, Py_ssize_t ihigh) { PyObject *as_tuple, *result; as_tuple = StaticTuple_as_tuple(self); if (as_tuple == NULL) { return NULL; } result = PyTuple_Type.tp_as_sequence->sq_slice(as_tuple, ilow, ihigh); Py_DECREF(as_tuple); return result; } static int StaticTuple_traverse(StaticTuple *self, visitproc visit, void *arg) { Py_ssize_t i; for (i = self->size; --i >= 0;) { Py_VISIT(self->items[i]); } return 0; } static PyObject * StaticTuple_sizeof(StaticTuple *self) { Py_ssize_t res; res = _PyObject_SIZE(&StaticTuple_Type) + (int)self->size * sizeof(void*); return PyInt_FromSsize_t(res); } static char StaticTuple_doc[] = "C implementation of a StaticTuple structure." "\n This is used as StaticTuple(item1, item2, item3)" "\n This is similar to tuple, less flexible in what it" "\n supports, but also lighter memory consumption." "\n Note that the constructor mimics the () form of tuples" "\n Rather than the 'tuple()' constructor." "\n eg. StaticTuple(a, b) == (a, b) == tuple((a, b))"; static PyMethodDef StaticTuple_methods[] = { {"as_tuple", (PyCFunction)StaticTuple_as_tuple, METH_NOARGS, StaticTuple_as_tuple_doc}, {"intern", (PyCFunction)StaticTuple_Intern, METH_NOARGS, StaticTuple_Intern_doc}, {"_is_interned", (PyCFunction)StaticTuple__is_interned, METH_NOARGS, StaticTuple__is_interned_doc}, {"from_sequence", (PyCFunction)StaticTuple_from_sequence, METH_STATIC | METH_VARARGS, "Create a StaticTuple from a given sequence. This functions" " the same as the tuple() constructor."}, {"__reduce__", (PyCFunction)StaticTuple_reduce, METH_NOARGS, StaticTuple_reduce_doc}, {"__sizeof__", (PyCFunction)StaticTuple_sizeof, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; static PyNumberMethods StaticTuple_as_number = { (binaryfunc) StaticTuple_add, /* nb_add */ 0, /* nb_subtract */ 0, /* nb_multiply */ 0, /* nb_divide */ 0, /* nb_remainder */ 0, /* nb_divmod */ 0, /* nb_power */ 0, /* nb_negative */ 0, /* nb_positive */ 0, /* nb_absolute */ 0, /* nb_nonzero */ 0, /* nb_invert */ 0, /* nb_lshift */ 0, /* nb_rshift */ 0, /* nb_and */ 0, /* nb_xor */ 0, /* nb_or */ 0, /* nb_coerce */ }; static PySequenceMethods StaticTuple_as_sequence = { (lenfunc)StaticTuple_length, /* sq_length */ 0, /* sq_concat */ 0, /* sq_repeat */ (ssizeargfunc)StaticTuple_item, /* sq_item */ (ssizessizeargfunc)StaticTuple_slice, /* sq_slice */ 0, /* sq_ass_item */ 0, /* sq_ass_slice */ 0, /* sq_contains */ }; /* TODO: Implement StaticTuple_as_mapping. * The only thing we really want to support from there is mp_subscript, * so that we could support extended slicing (foo[::2]). Not worth it * yet, though. */ PyTypeObject StaticTuple_Type = { PyObject_HEAD_INIT(NULL) 0, /* ob_size */ "bzrlib._static_tuple_c.StaticTuple", /* tp_name */ sizeof(StaticTuple), /* tp_basicsize */ sizeof(PyObject *), /* tp_itemsize */ (destructor)StaticTuple_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ (reprfunc)StaticTuple_repr, /* tp_repr */ &StaticTuple_as_number, /* tp_as_number */ &StaticTuple_as_sequence, /* tp_as_sequence */ 0, /* tp_as_mapping */ (hashfunc)StaticTuple_hash, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ /* Py_TPFLAGS_CHECKTYPES tells the number operations that they shouldn't * try to 'coerce' but instead stuff like 'add' will check it arguments. */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags*/ StaticTuple_doc, /* tp_doc */ /* gc.get_referents checks the IS_GC flag before it calls tp_traverse * And we don't include this object in the garbage collector because we * know it doesn't create cycles. However, 'meliae' will follow * tp_traverse, even if the object isn't GC, and we want that. */ (traverseproc)StaticTuple_traverse, /* tp_traverse */ 0, /* tp_clear */ StaticTuple_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ // without implementing tp_iter, Python will fall back to PySequence* // which seems to work ok, we may need something faster/lighter in the // future. 0, /* tp_iter */ 0, /* tp_iternext */ StaticTuple_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ 0, /* tp_alloc */ StaticTuple_new_constructor, /* tp_new */ }; static PyMethodDef static_tuple_c_methods[] = { {NULL, NULL} }; static void setup_interned_tuples(PyObject *m) { _interned_tuples = (PyObject *)SimpleSet_New(); if (_interned_tuples != NULL) { Py_INCREF(_interned_tuples); PyModule_AddObject(m, "_interned_tuples", _interned_tuples); } } static void setup_empty_tuple(PyObject *m) { StaticTuple *stuple; if (_interned_tuples == NULL) { fprintf(stderr, "You need to call setup_interned_tuples() before" " setup_empty_tuple, because we intern it.\n"); } // We need to create the empty tuple stuple = (StaticTuple *)StaticTuple_New(0); _empty_tuple = StaticTuple_Intern(stuple); assert(_empty_tuple == stuple); // At this point, refcnt is 2: 1 from New(), and 1 from the return from // intern(). We will keep 1 for the _empty_tuple global, and use the other // for the module reference. PyModule_AddObject(m, "_empty_tuple", (PyObject *)_empty_tuple); } static int _StaticTuple_CheckExact(PyObject *obj) { return StaticTuple_CheckExact(obj); } static void setup_c_api(PyObject *m) { _export_function(m, "StaticTuple_New", StaticTuple_New, "StaticTuple *(Py_ssize_t)"); _export_function(m, "StaticTuple_Intern", StaticTuple_Intern, "StaticTuple *(StaticTuple *)"); _export_function(m, "StaticTuple_FromSequence", StaticTuple_FromSequence, "StaticTuple *(PyObject *)"); _export_function(m, "_StaticTuple_CheckExact", _StaticTuple_CheckExact, "int(PyObject *)"); } static int _workaround_pyrex_096(void) { /* Work around an incompatibility in how pyrex 0.9.6 exports a module, * versus how pyrex 0.9.8 and cython 0.11 export it. * Namely 0.9.6 exports import__simple_set_pyx and tries to * "import _simple_set_pyx" but it is available only as * "import bzrlib._simple_set_pyx" * It is a shame to hack up sys.modules, but that is what we've got to do. */ PyObject *sys_module = NULL, *modules = NULL, *set_module = NULL; int retval = -1; /* Clear out the current ImportError exception, and try again. */ PyErr_Clear(); /* Note that this only seems to work if somewhere else imports * bzrlib._simple_set_pyx before importing bzrlib._static_tuple_c */ set_module = PyImport_ImportModule("bzrlib._simple_set_pyx"); if (set_module == NULL) { goto end; } /* Add the _simple_set_pyx into sys.modules at the appropriate location. */ sys_module = PyImport_ImportModule("sys"); if (sys_module == NULL) { goto end; } modules = PyObject_GetAttrString(sys_module, "modules"); if (modules == NULL || !PyDict_Check(modules)) { goto end; } PyDict_SetItemString(modules, "_simple_set_pyx", set_module); /* Now that we have hacked it in, try the import again. */ retval = import_bzrlib___simple_set_pyx(); end: Py_XDECREF(set_module); Py_XDECREF(sys_module); Py_XDECREF(modules); return retval; } PyMODINIT_FUNC init_static_tuple_c(void) { PyObject* m; StaticTuple_Type.tp_getattro = PyObject_GenericGetAttr; if (PyType_Ready(&StaticTuple_Type) < 0) return; m = Py_InitModule3("_static_tuple_c", static_tuple_c_methods, "C implementation of a StaticTuple structure"); if (m == NULL) return; Py_INCREF(&StaticTuple_Type); PyModule_AddObject(m, "StaticTuple", (PyObject *)&StaticTuple_Type); if (import_bzrlib___simple_set_pyx() == -1 && _workaround_pyrex_096() == -1) { return; } setup_interned_tuples(m); setup_empty_tuple(m); setup_c_api(m); } // vim: tabstop=4 sw=4 expandtab bzr-2.7.0/bzrlib/_static_tuple_c.h0000644000000000000000000001015611372263013015242 0ustar 00000000000000/* Copyright (C) 2009, 2010 Canonical Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _STATIC_TUPLE_H_ #define _STATIC_TUPLE_H_ #include #include #define STATIC_TUPLE_HAS_HASH 0 /* Caching the hash adds memory, but allows us to save a little time during * lookups. TIMEIT hash(key) shows it as * 0.108usec w/ hash * 0.160usec w/o hash * Note that the entries themselves are strings, which already cache their * hashes. So while there is a 1.5:1 difference in the time for hash(), it is * already a function which is quite fast. Probably the only reason we might * want to do so, is if we customized SimpleSet to the point that the item * pointers were exactly certain types, and then accessed table[i]->hash * directly. So far StaticTuple_hash() is fast enough to not warrant the memory * difference. */ /* This defines a single variable-width key. * It is basically the same as a tuple, but * 1) Lighter weight in memory * 2) Only supports strings or other static types (that don't reference other * objects.) */ #define STATIC_TUPLE_INTERNED_FLAG 0x01 typedef struct { PyObject_HEAD // We could go with unsigned short here, and support 64k width tuples // without any memory impact, might be worthwhile unsigned char size; unsigned char flags; unsigned char _unused0; unsigned char _unused1; // Note that on 64-bit, we actually have 4-more unused bytes // because items will always be aligned to a 64-bit boundary #if STATIC_TUPLE_HAS_HASH long hash; #endif PyObject *items[0]; } StaticTuple; extern PyTypeObject StaticTuple_Type; typedef struct { PyObject_VAR_HEAD PyObject *table[0]; } KeyIntern; #define StaticTuple_SET_ITEM(key, offset, val) \ ((((StaticTuple*)(key))->items[(offset)]) = ((PyObject *)(val))) #define StaticTuple_GET_ITEM(key, offset) (((StaticTuple*)key)->items[offset]) #define StaticTuple_GET_SIZE(key) (((StaticTuple*)key)->size) #ifdef STATIC_TUPLE_MODULE /* Used when compiling _static_tuple_c.c */ static StaticTuple * StaticTuple_New(Py_ssize_t); static StaticTuple * StaticTuple_Intern(StaticTuple *self); static StaticTuple * StaticTuple_FromSequence(PyObject *); #define StaticTuple_CheckExact(op) (Py_TYPE(op) == &StaticTuple_Type) #else /* Used as the foreign api */ #include "_import_c_api.h" static StaticTuple *(*StaticTuple_New)(Py_ssize_t); static StaticTuple *(*StaticTuple_Intern)(StaticTuple *); static StaticTuple *(*StaticTuple_FromSequence)(PyObject *); static PyTypeObject *_p_StaticTuple_Type; #define StaticTuple_CheckExact(op) (Py_TYPE(op) == _p_StaticTuple_Type) static int (*_StaticTuple_CheckExact)(PyObject *); /* Return -1 and set exception on error, 0 on success */ static int import_static_tuple_c(void) { struct function_description functions[] = { {"StaticTuple_New", (void **)&StaticTuple_New, "StaticTuple *(Py_ssize_t)"}, {"StaticTuple_Intern", (void **)&StaticTuple_Intern, "StaticTuple *(StaticTuple *)"}, {"StaticTuple_FromSequence", (void **)&StaticTuple_FromSequence, "StaticTuple *(PyObject *)"}, {"_StaticTuple_CheckExact", (void **)&_StaticTuple_CheckExact, "int(PyObject *)"}, {NULL}}; struct type_description types[] = { {"StaticTuple", &_p_StaticTuple_Type}, {NULL}}; return _import_extension_module("bzrlib._static_tuple_c", functions, types); } #endif // !STATIC_TUPLE_MODULE #endif // !_STATIC_TUPLE_H_ bzr-2.7.0/bzrlib/_static_tuple_c.pxd0000644000000000000000000000376311426274372015626 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """The interface definition file for the StaticTuple class.""" cdef extern from "Python.h": ctypedef int Py_ssize_t # Required for older pyrex versions ctypedef struct PyObject: pass cdef extern from "_static_tuple_c.h": ctypedef class bzrlib._static_tuple_c.StaticTuple [object StaticTuple]: cdef unsigned char size cdef unsigned char flags cdef PyObject *items[0] # Must be called before using any of the C api, as it sets the function # pointers in memory. int import_static_tuple_c() except -1 StaticTuple StaticTuple_New(Py_ssize_t) StaticTuple StaticTuple_Intern(StaticTuple) StaticTuple StaticTuple_FromSequence(object) # Steals a reference and val must be a valid type, no checking is done void StaticTuple_SET_ITEM(StaticTuple key, Py_ssize_t offset, object val) # We would normally use PyObject * here. However it seems that cython/pyrex # treat the PyObject defined in this header as something different than one # defined in a .pyx file. And since we don't INCREF, we need a raw pointer, # not an 'object' return value. void *StaticTuple_GET_ITEM(StaticTuple key, Py_ssize_t offset) int StaticTuple_CheckExact(object) Py_ssize_t StaticTuple_GET_SIZE(StaticTuple key) bzr-2.7.0/bzrlib/_static_tuple_py.py0000644000000000000000000000541311673635356015672 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """The pure-python implementation of the StaticTuple type. Note that it is generally just implemented as using tuples of tuples of strings. """ from __future__ import absolute_import class StaticTuple(tuple): """A static type, similar to a tuple of strings.""" __slots__ = () def __new__(cls, *args): # Make the empty StaticTuple a singleton if not args and _empty_tuple is not None: return _empty_tuple return tuple.__new__(cls, args) def __init__(self, *args): """Create a new 'StaticTuple'""" num_keys = len(args) if num_keys < 0 or num_keys > 255: raise TypeError('StaticTuple(...) takes from 0 to 255 items') for bit in args: if type(bit) not in (str, StaticTuple, unicode, int, long, float, None.__class__, bool): raise TypeError('StaticTuple can only point to' ' StaticTuple, str, unicode, int, long, float, bool, or' ' None not %s' % (type(bit),)) # We don't need to pass args to tuple.__init__, because that was # already handled in __new__. tuple.__init__(self) def __repr__(self): return '%s%s' % (self.__class__.__name__, tuple.__repr__(self)) def __reduce__(self): return (StaticTuple, tuple(self)) def __add__(self, other): """Concatenate self with other""" return StaticTuple.from_sequence(tuple.__add__(self,other)) def as_tuple(self): return tuple(self) def intern(self): return _interned_tuples.setdefault(self, self) @staticmethod def from_sequence(seq): """Convert a sequence object into a StaticTuple instance.""" if isinstance(seq, StaticTuple): # it already is return seq return StaticTuple(*seq) # Have to set it to None first, so that __new__ can determine whether # the _empty_tuple singleton has been created yet or not. _empty_tuple = None _empty_tuple = StaticTuple() _interned_tuples = {} bzr-2.7.0/bzrlib/_termcolor.py0000644000000000000000000000374712015443202014452 0ustar 00000000000000# Copyright (C) 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import os import sys class FG(object): """Unix terminal foreground color codes (16-color).""" RED = '\033[31m' GREEN = '\033[32m' YELLOW = '\033[33m' BLUE = '\033[34m' MAGENTA = '\033[35m' CYAN = '\033[36m' WHITE = '\033[37m' # Bold Foreground BOLD_RED = '\033[1;31m' BOLD_GREEN = '\033[1;32m' BOLD_YELLOW = '\033[1;33m' BOLD_BLUE = '\033[1;34m' BOLD_MAGENTA = '\033[1;35m' BOLD_CYAN = '\033[1;36m' BOLD_WHITE = '\033[1;37m' NONE = '\033[0m' class BG(object): """Unix terminal background color codes (16-color).""" BLACK = '\033[40m' RED = '\033[41m' GREEN = '\033[42m' YELLOW = '\033[43m' BLUE = '\033[44m' MAGENTA = '\033[45m' CYAN = '\033[46m' WHITE = '\033[47m' NONE = '\033[0m' def color_string(s, fg, bg=''): return fg + bg + s + FG.NONE def re_color_string(compiled_pattern, s, fg): return compiled_pattern.sub(fg + r'\1' + FG.NONE, s) def allow_color(): if os.name != 'posix': return False if not sys.stdout.isatty(): return False try: import curses curses.setupterm() return curses.tigetnum('colors') > 2 except curses.error: return False bzr-2.7.0/bzrlib/_walkdirs_win32.pyx0000644000000000000000000002361411735017205015501 0ustar 00000000000000# Copyright (C) 2008-2012 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Helper functions for Walkdirs on win32.""" cdef extern from "python-compat.h": struct _HANDLE: pass ctypedef _HANDLE *HANDLE ctypedef unsigned long DWORD ctypedef long long __int64 ctypedef unsigned short WCHAR struct _FILETIME: DWORD dwHighDateTime DWORD dwLowDateTime ctypedef _FILETIME FILETIME struct _WIN32_FIND_DATAW: DWORD dwFileAttributes FILETIME ftCreationTime FILETIME ftLastAccessTime FILETIME ftLastWriteTime DWORD nFileSizeHigh DWORD nFileSizeLow # Some reserved stuff here WCHAR cFileName[260] # MAX_PATH WCHAR cAlternateFilename[14] # We have to use the typedef trick, otherwise pyrex uses: # struct WIN32_FIND_DATAW # which fails due to 'incomplete type' ctypedef _WIN32_FIND_DATAW WIN32_FIND_DATAW HANDLE INVALID_HANDLE_VALUE HANDLE FindFirstFileW(WCHAR *path, WIN32_FIND_DATAW *data) int FindNextFileW(HANDLE search, WIN32_FIND_DATAW *data) int FindClose(HANDLE search) DWORD FILE_ATTRIBUTE_READONLY DWORD FILE_ATTRIBUTE_DIRECTORY int ERROR_NO_MORE_FILES int GetLastError() # Wide character functions DWORD wcslen(WCHAR *) cdef extern from "Python.h": WCHAR *PyUnicode_AS_UNICODE(object) Py_ssize_t PyUnicode_GET_SIZE(object) object PyUnicode_FromUnicode(WCHAR *, Py_ssize_t) int PyList_Append(object, object) except -1 object PyUnicode_AsUTF8String(object) import operator import os import stat from bzrlib import _readdir_py cdef object osutils osutils = None cdef class _Win32Stat: """Represent a 'stat' result generated from WIN32_FIND_DATA""" cdef readonly int st_mode cdef readonly double st_ctime cdef readonly double st_mtime cdef readonly double st_atime # We can't just declare this as 'readonly' because python2.4 doesn't define # T_LONGLONG as a structure member. So instead we just use a property that # will convert it correctly anyway. cdef __int64 _st_size property st_size: def __get__(self): return self._st_size # os.stat always returns 0, so we hard code it here property st_dev: def __get__(self): return 0 property st_ino: def __get__(self): return 0 # st_uid and st_gid required for some external tools like bzr-git & dulwich property st_uid: def __get__(self): return 0 property st_gid: def __get__(self): return 0 def __repr__(self): """Repr is the same as a Stat object. (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime) """ return repr((self.st_mode, 0, 0, 0, 0, 0, self.st_size, self.st_atime, self.st_mtime, self.st_ctime)) cdef object _get_name(WIN32_FIND_DATAW *data): """Extract the Unicode name for this file/dir.""" return PyUnicode_FromUnicode(data.cFileName, wcslen(data.cFileName)) cdef int _get_mode_bits(WIN32_FIND_DATAW *data): # cannot_raise cdef int mode_bits mode_bits = 0100666 # writeable file, the most common if data.dwFileAttributes & FILE_ATTRIBUTE_READONLY == FILE_ATTRIBUTE_READONLY: mode_bits = mode_bits ^ 0222 # remove the write bits if data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY == FILE_ATTRIBUTE_DIRECTORY: # Remove the FILE bit, set the DIR bit, and set the EXEC bits mode_bits = mode_bits ^ 0140111 return mode_bits cdef __int64 _get_size(WIN32_FIND_DATAW *data): # cannot_raise # Pyrex casts a DWORD into a PyLong anyway, so it is safe to do << 32 # on a DWORD return ((<__int64>data.nFileSizeHigh) << 32) + data.nFileSizeLow cdef double _ftime_to_timestamp(FILETIME *ft): # cannot_raise """Convert from a FILETIME struct into a floating point timestamp. The fields of a FILETIME structure are the hi and lo part of a 64-bit value expressed in 100 nanosecond units. 1e7 is one second in such units; 1e-7 the inverse. 429.4967296 is 2**32 / 1e7 or 2**32 * 1e-7. It also uses the epoch 1601-01-01 rather than 1970-01-01 (taken from posixmodule.c) """ cdef __int64 val # NB: This gives slightly different results versus casting to a 64-bit # integer and doing integer math before casting into a floating # point number. But the difference is in the sub millisecond range, # which doesn't seem critical here. # secs between epochs: 11,644,473,600 val = ((<__int64>ft.dwHighDateTime) << 32) + ft.dwLowDateTime return (val * 1.0e-7) - 11644473600.0 cdef int _should_skip(WIN32_FIND_DATAW *data): # cannot_raise """Is this '.' or '..' so we should skip it?""" if (data.cFileName[0] != c'.'): return 0 if data.cFileName[1] == c'\0': return 1 if data.cFileName[1] == c'.' and data.cFileName[2] == c'\0': return 1 return 0 cdef class Win32ReadDir: """Read directories on win32.""" cdef object _directory_kind cdef object _file_kind def __init__(self): self._directory_kind = _readdir_py._directory self._file_kind = _readdir_py._file def top_prefix_to_starting_dir(self, top, prefix=""): """See DirReader.top_prefix_to_starting_dir.""" global osutils if osutils is None: from bzrlib import osutils return (osutils.safe_utf8(prefix), None, None, None, osutils.safe_unicode(top)) cdef object _get_kind(self, WIN32_FIND_DATAW *data): if data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY: return self._directory_kind return self._file_kind cdef _Win32Stat _get_stat_value(self, WIN32_FIND_DATAW *data): """Get the filename and the stat information.""" cdef _Win32Stat statvalue statvalue = _Win32Stat() statvalue.st_mode = _get_mode_bits(data) statvalue.st_ctime = _ftime_to_timestamp(&data.ftCreationTime) statvalue.st_mtime = _ftime_to_timestamp(&data.ftLastWriteTime) statvalue.st_atime = _ftime_to_timestamp(&data.ftLastAccessTime) statvalue._st_size = _get_size(data) return statvalue def read_dir(self, prefix, top): """Win32 implementation of DirReader.read_dir. :seealso: DirReader.read_dir """ cdef WIN32_FIND_DATAW search_data cdef HANDLE hFindFile cdef int last_err cdef WCHAR *query cdef int result if prefix: relprefix = prefix + '/' else: relprefix = '' top_slash = top + '/' top_star = top_slash + '*' dirblock = [] query = PyUnicode_AS_UNICODE(top_star) hFindFile = FindFirstFileW(query, &search_data) if hFindFile == INVALID_HANDLE_VALUE: # Raise an exception? This path doesn't seem to exist raise WindowsError(GetLastError(), top_star) try: result = 1 while result: # Skip '.' and '..' if _should_skip(&search_data): result = FindNextFileW(hFindFile, &search_data) continue name_unicode = _get_name(&search_data) name_utf8 = PyUnicode_AsUTF8String(name_unicode) PyList_Append(dirblock, (relprefix + name_utf8, name_utf8, self._get_kind(&search_data), self._get_stat_value(&search_data), top_slash + name_unicode)) result = FindNextFileW(hFindFile, &search_data) # FindNextFileW sets GetLastError() == ERROR_NO_MORE_FILES when it # actually finishes. If we have anything else, then we have a # genuine problem last_err = GetLastError() if last_err != ERROR_NO_MORE_FILES: raise WindowsError(last_err) finally: result = FindClose(hFindFile) if result == 0: last_err = GetLastError() # TODO: We should probably raise an exception if FindClose # returns an error, however, I don't want to supress an # earlier Exception, so for now, I'm ignoring this dirblock.sort(key=operator.itemgetter(1)) return dirblock def lstat(path): """Equivalent to os.lstat, except match Win32ReadDir._get_stat_value. """ return wrap_stat(os.lstat(path)) def fstat(fd): """Like os.fstat, except match Win32ReadDir._get_stat_value :seealso: wrap_stat """ return wrap_stat(os.fstat(fd)) def wrap_stat(st): """Return a _Win32Stat object, based on the given stat result. On Windows, os.fstat(open(fname).fileno()) != os.lstat(fname). This is generally because os.lstat and os.fstat differ in what they put into st_ino and st_dev. What gets set where seems to also be dependent on the python version. So we always set it to 0 to avoid worrying about it. """ cdef _Win32Stat statvalue statvalue = _Win32Stat() statvalue.st_mode = st.st_mode statvalue.st_ctime = st.st_ctime statvalue.st_mtime = st.st_mtime statvalue.st_atime = st.st_atime statvalue._st_size = st.st_size return statvalue bzr-2.7.0/bzrlib/add.py0000644000000000000000000001234511720711656013044 0ustar 00000000000000# Copyright (C) 2005-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Helper functions for adding files to working trees.""" from __future__ import absolute_import import sys import os from bzrlib import ( osutils, ui, ) from bzrlib.i18n import gettext class AddAction(object): """A class which defines what action to take when adding a file.""" def __init__(self, to_file=None, should_print=None): """Initialize an action which prints added files to an output stream. :param to_file: The stream to write into. This is expected to take Unicode paths. If not supplied, it will default to ``sys.stdout``. :param should_print: If False, printing will be suppressed. """ self._to_file = to_file if to_file is None: self._to_file = sys.stdout self.should_print = False if should_print is not None: self.should_print = should_print def __call__(self, inv, parent_ie, path, kind, _quote=osutils.quotefn): """Add path to inventory. The default action does nothing. :param inv: The inventory we are working with. :param path: The FastPath being added :param kind: The kind of the object being added. """ if self.should_print: self._to_file.write('adding %s\n' % _quote(path)) return None def skip_file(self, tree, path, kind, stat_value = None): """Test whether the given file should be skipped or not. The default action never skips. Note this is only called during recursive adds :param tree: The tree we are working in :param path: The path being added :param kind: The kind of object being added. :param stat: Stat result for this file, if available already :return bool. True if the file should be skipped (not added) """ return False class AddWithSkipLargeAction(AddAction): """A class that can decide to skip a file if it's considered too large""" _maxSize = None def skip_file(self, tree, path, kind, stat_value = None): if kind != 'file': return False opt_name = 'add.maximum_file_size' if self._maxSize is None: config = tree.get_config_stack() self._maxSize = config.get(opt_name) if stat_value is None: file_size = os.path.getsize(path); else: file_size = stat_value.st_size; if self._maxSize > 0 and file_size > self._maxSize: ui.ui_factory.show_warning(gettext( "skipping {0} (larger than {1} of {2} bytes)").format( path, opt_name, self._maxSize)) return True return False class AddFromBaseAction(AddAction): """This class will try to extract file ids from another tree.""" def __init__(self, base_tree, base_path, to_file=None, should_print=None): super(AddFromBaseAction, self).__init__(to_file=to_file, should_print=should_print) self.base_tree = base_tree self.base_path = base_path def __call__(self, inv, parent_ie, path, kind): # Place the parent call # Now check to see if we can extract an id for this file file_id, base_path = self._get_base_file_id(path, parent_ie) if file_id is not None: if self.should_print: self._to_file.write('adding %s w/ file id from %s\n' % (path, base_path)) else: # we aren't doing anything special, so let the default # reporter happen file_id = super(AddFromBaseAction, self).__call__( inv, parent_ie, path, kind) return file_id def _get_base_file_id(self, path, parent_ie): """Look for a file id in the base branch. First, if the base tree has the parent directory, we look for a file with the same name in that directory. Else, we look for an entry in the base tree with the same path. """ if self.base_tree.has_id(parent_ie.file_id): base_path = osutils.pathjoin( self.base_tree.id2path(parent_ie.file_id), osutils.basename(path)) base_id = self.base_tree.path2id(base_path) if base_id is not None: return (base_id, base_path) full_base_path = osutils.pathjoin(self.base_path, path) # This may return None, but it is our last attempt return self.base_tree.path2id(full_base_path), full_base_path bzr-2.7.0/bzrlib/annotate.py0000644000000000000000000004555311772323700014130 0ustar 00000000000000# Copyright (C) 2005-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """File annotate based on weave storage""" from __future__ import absolute_import # TODO: Choice of more or less verbose formats: # # interposed: show more details between blocks of modified lines # TODO: Show which revision caused a line to merge into the parent # TODO: perhaps abbreviate timescales depending on how recent they are # e.g. "3:12 Tue", "13 Oct", "Oct 2005", etc. import sys import time from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( patiencediff, tsort, ) """) from bzrlib import ( errors, osutils, ) from bzrlib.config import extract_email_address from bzrlib.repository import _strip_NULL_ghosts from bzrlib.revision import ( CURRENT_REVISION, Revision, ) def annotate_file_tree(tree, file_id, to_file, verbose=False, full=False, show_ids=False, branch=None): """Annotate file_id in a tree. The tree should already be read_locked() when annotate_file_tree is called. :param tree: The tree to look for revision numbers and history from. :param file_id: The file_id to annotate. :param to_file: The file to output the annotation to. :param verbose: Show all details rather than truncating to ensure reasonable text width. :param full: XXXX Not sure what this does. :param show_ids: Show revision ids in the annotation output. :param branch: Branch to use for revision revno lookups """ if branch is None: branch = tree.branch if to_file is None: to_file = sys.stdout # Handle the show_ids case annotations = list(tree.annotate_iter(file_id)) if show_ids: return _show_id_annotations(annotations, to_file, full) if not getattr(tree, "get_revision_id", False): # Create a virtual revision to represent the current tree state. # Should get some more pending commit attributes, like pending tags, # bugfixes etc. current_rev = Revision(CURRENT_REVISION) current_rev.parent_ids = tree.get_parent_ids() try: current_rev.committer = branch.get_config_stack().get('email') except errors.NoWhoami: current_rev.committer = 'local user' current_rev.message = "?" current_rev.timestamp = round(time.time(), 3) current_rev.timezone = osutils.local_time_offset() else: current_rev = None annotation = list(_expand_annotations(annotations, branch, current_rev)) _print_annotations(annotation, verbose, to_file, full) def _print_annotations(annotation, verbose, to_file, full): """Print annotations to to_file. :param to_file: The file to output the annotation to. :param verbose: Show all details rather than truncating to ensure reasonable text width. :param full: XXXX Not sure what this does. """ if len(annotation) == 0: max_origin_len = max_revno_len = max_revid_len = 0 else: max_origin_len = max(len(x[1]) for x in annotation) max_revno_len = max(len(x[0]) for x in annotation) max_revid_len = max(len(x[3]) for x in annotation) if not verbose: max_revno_len = min(max_revno_len, 12) max_revno_len = max(max_revno_len, 3) # Output the annotations prevanno = '' encoding = getattr(to_file, 'encoding', None) or \ osutils.get_terminal_encoding() for (revno_str, author, date_str, line_rev_id, text) in annotation: if verbose: anno = '%-*s %-*s %8s ' % (max_revno_len, revno_str, max_origin_len, author, date_str) else: if len(revno_str) > max_revno_len: revno_str = revno_str[:max_revno_len-1] + '>' anno = "%-*s %-7s " % (max_revno_len, revno_str, author[:7]) if anno.lstrip() == "" and full: anno = prevanno try: to_file.write(anno) except UnicodeEncodeError: # cmd_annotate should be passing in an 'exact' object, which means # we have a direct handle to sys.stdout or equivalent. It may not # be able to handle the exact Unicode characters, but 'annotate' is # a user function (non-scripting), so shouldn't die because of # unrepresentable annotation characters. So encode using 'replace', # and write them again. to_file.write(anno.encode(encoding, 'replace')) to_file.write('| %s\n' % (text,)) prevanno = anno def _show_id_annotations(annotations, to_file, full): if not annotations: return last_rev_id = None max_origin_len = max(len(origin) for origin, text in annotations) for origin, text in annotations: if full or last_rev_id != origin: this = origin else: this = '' to_file.write('%*s | %s' % (max_origin_len, this, text)) last_rev_id = origin return def _expand_annotations(annotations, branch, current_rev=None): """Expand a file's annotations into command line UI ready tuples. Each tuple includes detailed information, such as the author name, and date string for the commit, rather than just the revision id. :param annotations: The annotations to expand. :param revision_id_to_revno: A map from id to revision numbers. :param branch: A locked branch to query for revision details. """ repository = branch.repository if current_rev is not None: # This can probably become a function on MutableTree, get_revno_map # there, or something. last_revision = current_rev.revision_id # XXX: Partially Cloned from branch, uses the old_get_graph, eep. # XXX: The main difficulty is that we need to inject a single new node # (current_rev) into the graph before it gets numbered, etc. # Once KnownGraph gets an 'add_node()' function, we can use # VF.get_known_graph_ancestry(). graph = repository.get_graph() revision_graph = dict(((key, value) for key, value in graph.iter_ancestry(current_rev.parent_ids) if value is not None)) revision_graph = _strip_NULL_ghosts(revision_graph) revision_graph[last_revision] = current_rev.parent_ids merge_sorted_revisions = tsort.merge_sort( revision_graph, last_revision, None, generate_revno=True) revision_id_to_revno = dict((rev_id, revno) for seq_num, rev_id, depth, revno, end_of_merge in merge_sorted_revisions) else: revision_id_to_revno = branch.get_revision_id_to_revno_map() last_origin = None revision_ids = set(o for o, t in annotations) revisions = {} if CURRENT_REVISION in revision_ids: revision_id_to_revno[CURRENT_REVISION] = ( "%d?" % (branch.revno() + 1),) revisions[CURRENT_REVISION] = current_rev revision_ids = [o for o in revision_ids if repository.has_revision(o)] revisions.update((r.revision_id, r) for r in repository.get_revisions(revision_ids)) for origin, text in annotations: text = text.rstrip('\r\n') if origin == last_origin: (revno_str, author, date_str) = ('','','') else: last_origin = origin if origin not in revisions: (revno_str, author, date_str) = ('?','?','?') else: revno_str = '.'.join(str(i) for i in revision_id_to_revno[origin]) rev = revisions[origin] tz = rev.timezone or 0 date_str = time.strftime('%Y%m%d', time.gmtime(rev.timestamp + tz)) # a lazy way to get something like the email address # TODO: Get real email address author = rev.get_apparent_authors()[0] try: author = extract_email_address(author) except errors.NoEmailInUsername: pass # use the whole name yield (revno_str, author, date_str, origin, text) def reannotate(parents_lines, new_lines, new_revision_id, _left_matching_blocks=None, heads_provider=None): """Create a new annotated version from new lines and parent annotations. :param parents_lines: List of annotated lines for all parents :param new_lines: The un-annotated new lines :param new_revision_id: The revision-id to associate with new lines (will often be CURRENT_REVISION) :param left_matching_blocks: a hint about which areas are common between the text and its left-hand-parent. The format is the SequenceMatcher.get_matching_blocks format (start_left, start_right, length_of_match). :param heads_provider: An object which provides a .heads() call to resolve if any revision ids are children of others. If None, then any ancestry disputes will be resolved with new_revision_id """ if len(parents_lines) == 0: lines = [(new_revision_id, line) for line in new_lines] elif len(parents_lines) == 1: lines = _reannotate(parents_lines[0], new_lines, new_revision_id, _left_matching_blocks) elif len(parents_lines) == 2: left = _reannotate(parents_lines[0], new_lines, new_revision_id, _left_matching_blocks) lines = _reannotate_annotated(parents_lines[1], new_lines, new_revision_id, left, heads_provider) else: reannotations = [_reannotate(parents_lines[0], new_lines, new_revision_id, _left_matching_blocks)] reannotations.extend(_reannotate(p, new_lines, new_revision_id) for p in parents_lines[1:]) lines = [] for annos in zip(*reannotations): origins = set(a for a, l in annos) if len(origins) == 1: # All the parents agree, so just return the first one lines.append(annos[0]) else: line = annos[0][1] if len(origins) == 2 and new_revision_id in origins: origins.remove(new_revision_id) if len(origins) == 1: lines.append((origins.pop(), line)) else: lines.append((new_revision_id, line)) return lines def _reannotate(parent_lines, new_lines, new_revision_id, matching_blocks=None): new_cur = 0 if matching_blocks is None: plain_parent_lines = [l for r, l in parent_lines] matcher = patiencediff.PatienceSequenceMatcher(None, plain_parent_lines, new_lines) matching_blocks = matcher.get_matching_blocks() lines = [] for i, j, n in matching_blocks: for line in new_lines[new_cur:j]: lines.append((new_revision_id, line)) lines.extend(parent_lines[i:i+n]) new_cur = j + n return lines def _get_matching_blocks(old, new): matcher = patiencediff.PatienceSequenceMatcher(None, old, new) return matcher.get_matching_blocks() _break_annotation_tie = None def _old_break_annotation_tie(annotated_lines): """Chose an attribution between several possible ones. :param annotated_lines: A list of tuples ((file_id, rev_id), line) where the lines are identical but the revids different while no parent relation exist between them :return : The "winning" line. This must be one with a revid that guarantees that further criss-cross merges will converge. Failing to do so have performance implications. """ # sort lexicographically so that we always get a stable result. # TODO: while 'sort' is the easiest (and nearly the only possible solution) # with the current implementation, chosing the oldest revision is known to # provide better results (as in matching user expectations). The most # common use case being manual cherry-pick from an already existing # revision. return sorted(annotated_lines)[0] def _find_matching_unannotated_lines(output_lines, plain_child_lines, child_lines, start_child, end_child, right_lines, start_right, end_right, heads_provider, revision_id): """Find lines in plain_right_lines that match the existing lines. :param output_lines: Append final annotated lines to this list :param plain_child_lines: The unannotated new lines for the child text :param child_lines: Lines for the child text which have been annotated for the left parent :param start_child: Position in plain_child_lines and child_lines to start the match searching :param end_child: Last position in plain_child_lines and child_lines to search for a match :param right_lines: The annotated lines for the whole text for the right parent :param start_right: Position in right_lines to start the match :param end_right: Last position in right_lines to search for a match :param heads_provider: When parents disagree on the lineage of a line, we need to check if one side supersedes the other :param revision_id: The label to give if a line should be labeled 'tip' """ output_extend = output_lines.extend output_append = output_lines.append # We need to see if any of the unannotated lines match plain_right_subset = [l for a,l in right_lines[start_right:end_right]] plain_child_subset = plain_child_lines[start_child:end_child] match_blocks = _get_matching_blocks(plain_right_subset, plain_child_subset) last_child_idx = 0 for right_idx, child_idx, match_len in match_blocks: # All the lines that don't match are just passed along if child_idx > last_child_idx: output_extend(child_lines[start_child + last_child_idx :start_child + child_idx]) for offset in xrange(match_len): left = child_lines[start_child+child_idx+offset] right = right_lines[start_right+right_idx+offset] if left[0] == right[0]: # The annotations match, just return the left one output_append(left) elif left[0] == revision_id: # The left parent marked this as unmatched, so let the # right parent claim it output_append(right) else: # Left and Right both claim this line if heads_provider is None: output_append((revision_id, left[1])) else: heads = heads_provider.heads((left[0], right[0])) if len(heads) == 1: output_append((iter(heads).next(), left[1])) else: # Both claim different origins, get a stable result. # If the result is not stable, there is a risk a # performance degradation as criss-cross merges will # flip-flop the attribution. if _break_annotation_tie is None: output_append( _old_break_annotation_tie([left, right])) else: output_append(_break_annotation_tie([left, right])) last_child_idx = child_idx + match_len def _reannotate_annotated(right_parent_lines, new_lines, new_revision_id, annotated_lines, heads_provider): """Update the annotations for a node based on another parent. :param right_parent_lines: A list of annotated lines for the right-hand parent. :param new_lines: The unannotated new lines. :param new_revision_id: The revision_id to attribute to lines which are not present in either parent. :param annotated_lines: A list of annotated lines. This should be the annotation of new_lines based on parents seen so far. :param heads_provider: When parents disagree on the lineage of a line, we need to check if one side supersedes the other. """ if len(new_lines) != len(annotated_lines): raise AssertionError("mismatched new_lines and annotated_lines") # First compare the newly annotated lines with the right annotated lines. # Lines which were not changed in left or right should match. This tends to # be the bulk of the lines, and they will need no further processing. lines = [] lines_extend = lines.extend last_right_idx = 0 # The line just after the last match from the right side last_left_idx = 0 matching_left_and_right = _get_matching_blocks(right_parent_lines, annotated_lines) for right_idx, left_idx, match_len in matching_left_and_right: # annotated lines from last_left_idx to left_idx did not match the # lines from last_right_idx to right_idx, the raw lines should be # compared to determine what annotations need to be updated if last_right_idx == right_idx or last_left_idx == left_idx: # One of the sides is empty, so this is a pure insertion lines_extend(annotated_lines[last_left_idx:left_idx]) else: # We need to see if any of the unannotated lines match _find_matching_unannotated_lines(lines, new_lines, annotated_lines, last_left_idx, left_idx, right_parent_lines, last_right_idx, right_idx, heads_provider, new_revision_id) last_right_idx = right_idx + match_len last_left_idx = left_idx + match_len # If left and right agree on a range, just push that into the output lines_extend(annotated_lines[left_idx:left_idx + match_len]) return lines try: from bzrlib._annotator_pyx import Annotator except ImportError, e: osutils.failed_to_load_extension(e) from bzrlib._annotator_py import Annotator bzr-2.7.0/bzrlib/api.py0000644000000000000000000000727611673635356013105 0ustar 00000000000000# Copyright (C) 2007, 2008, 2009, 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Library API versioning support. Added in bzrlib 0.18 this allows export of compatibility information about bzrlib. Please see doc/developers/api-versioning.txt for design details and examples. """ from __future__ import absolute_import import bzrlib from bzrlib.errors import IncompatibleAPI def get_current_api_version(object_with_api): """Return the API version tuple for object_with_api. :param object_with_api: An object to look for an API version on. If the object has a api_current_version attribute, that is used. Otherwise if there is a version_info attribute, its first three elements are used. Finally if there was no version_info attribute, the current api version of bzrlib itself is used. Added in bzrlib 0.18. """ try: return object_with_api.api_current_version except AttributeError: try: return object_with_api.version_info[0:3] except AttributeError: return get_current_api_version(bzrlib) def get_minimum_api_version(object_with_api): """Return the minimum API version supported by object_with_api. :param object_with_api: An object to look for an API version on. If the object has a api_minimum_version attribute, that is used. Otherwise the minimum api version of bzrlib itself is used. Added in bzrlib 0.18. """ try: return object_with_api.api_minimum_version except AttributeError: return get_minimum_api_version(bzrlib) def require_api(object_with_api, wanted_api): """Check if object_with_api supports the api version wanted_api. :param object_with_api: An object which exports an API minimum and current version. See get_minimum_api_version and get_current_api_version for details. :param wanted_api: The API version for which support is required. :return: None :raises IncompatibleAPI: When the wanted_api is not supported by object_with_api. Added in bzrlib 0.18. """ current = get_current_api_version(object_with_api) minimum = get_minimum_api_version(object_with_api) if wanted_api < minimum or wanted_api > current: raise IncompatibleAPI(object_with_api, wanted_api, minimum, current) def require_any_api(object_with_api, wanted_api_list): """Check if object_with_api supports the api version wanted_api. :param object_with_api: An object which exports an API minimum and current version. See get_minimum_api_version and get_current_api_version for details. :param wanted_api: A list of API versions, any of which being available is sufficent. :return: None :raises IncompatibleAPI: When the wanted_api is not supported by object_with_api. Added in bzrlib 1.9. """ for api in wanted_api_list[:-1]: try: return require_api(object_with_api, api) except IncompatibleAPI: pass require_api(object_with_api, wanted_api_list[-1]) bzr-2.7.0/bzrlib/atomicfile.py0000644000000000000000000000715211673360271014430 0ustar 00000000000000# Copyright (C) 2005, 2006, 2008, 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import os from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import stat import warnings from bzrlib import ( errors, osutils, symbol_versioning, ) """) # not forksafe - but we dont fork. _pid = os.getpid() _hostname = None class AtomicFile(object): """A file that does an atomic-rename to move into place. This also causes hardlinks to break when it's written out. Open this as for a regular file, then use commit() to move into place or abort() to cancel. """ __slots__ = ['tmpfilename', 'realfilename', '_fd'] def __init__(self, filename, mode='wb', new_mode=None): global _hostname self._fd = None if _hostname is None: _hostname = osutils.get_host_name() self.tmpfilename = '%s.%d.%s.%s.tmp' % (filename, _pid, _hostname, osutils.rand_chars(10)) self.realfilename = filename flags = os.O_EXCL | os.O_CREAT | os.O_WRONLY | osutils.O_NOINHERIT if mode == 'wb': flags |= osutils.O_BINARY elif mode != 'wt': raise ValueError("invalid AtomicFile mode %r" % mode) if new_mode is not None: local_mode = new_mode else: local_mode = 0666 # Use a low level fd operation to avoid chmodding later. # This may not succeed, but it should help most of the time self._fd = os.open(self.tmpfilename, flags, local_mode) if new_mode is not None: # Because of umask issues, we may need to chmod anyway # the common case is that we won't, though. st = os.fstat(self._fd) if stat.S_IMODE(st.st_mode) != new_mode: osutils.chmod_if_possible(self.tmpfilename, new_mode) def __repr__(self): return '%s(%r)' % (self.__class__.__name__, self.realfilename) def write(self, data): """Write some data to the file. Like file.write()""" os.write(self._fd, data) def _close_tmpfile(self, func_name): """Close the local temp file in preparation for commit or abort""" if self._fd is None: raise errors.AtomicFileAlreadyClosed(path=self.realfilename, function=func_name) fd = self._fd self._fd = None os.close(fd) def commit(self): """Close the file and move to final name.""" self._close_tmpfile('commit') osutils.rename(self.tmpfilename, self.realfilename) def abort(self): """Discard temporary file without committing changes.""" self._close_tmpfile('abort') os.remove(self.tmpfilename) def close(self): """Discard the file unless already committed.""" if self._fd is not None: self.abort() bzr-2.7.0/bzrlib/bencode.py0000644000000000000000000000213411673635356013717 0ustar 00000000000000# Copyright (C) 2007,2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Wrapper around the bencode pyrex and python implementation""" from __future__ import absolute_import from bzrlib import osutils try: from bzrlib._bencode_pyx import bdecode, bdecode_as_tuple, bencode, Bencached except ImportError, e: osutils.failed_to_load_extension(e) from bzrlib.util._bencode_py import bdecode, bdecode_as_tuple, bencode, Bencached bzr-2.7.0/bzrlib/bisect_multi.py0000644000000000000000000000525311673635356015010 0ustar 00000000000000# Copyright (C) 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Bisection lookup multiple keys.""" from __future__ import absolute_import __all__ = [ 'bisect_multi_bytes', ] def bisect_multi_bytes(content_lookup, size, keys): """Perform bisection lookups for keys using byte based addressing. The keys are looked up via the content_lookup routine. The content_lookup routine gives bisect_multi_bytes information about where to keep looking up to find the data for the key, and bisect_multi_bytes feeds this back into the lookup function until the search is complete. The search is complete when the list of keys which have returned something other than -1 or +1 is empty. Keys which are not found are not returned to the caller. :param content_lookup: A callable that takes a list of (offset, key) pairs and returns a list of result tuples ((offset, key), result). Each result can be one of: -1: The key comes earlier in the content. False: The key is not present in the content. +1: The key comes later in the content. Any other value: A final result to return to the caller. :param size: The length of the content. :param keys: The keys to bisect for. :return: An iterator of the results. """ # possibly make this a generator, but a list meets the contract for now. result = [] delta = size // 2 search_keys = [(delta, key) for key in keys] while search_keys: search_results = content_lookup(search_keys) if delta > 1: delta = delta // 2 search_keys = [] for (location, key), status in search_results: if status == -1: search_keys.append((location - delta, key)) elif status == 1: search_keys.append((location + delta, key)) elif status == False: # not present, stop searching continue else: result.append((key, status)) return result bzr-2.7.0/bzrlib/branch.py0000644000000000000000000042217112147364641013555 0ustar 00000000000000# Copyright (C) 2005-2012 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import bzrlib.bzrdir from cStringIO import StringIO from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import itertools from bzrlib import ( bzrdir, controldir, cache_utf8, cleanup, config as _mod_config, debug, errors, fetch, graph as _mod_graph, lockdir, lockable_files, remote, repository, revision as _mod_revision, rio, shelf, tag as _mod_tag, transport, ui, urlutils, vf_search, ) from bzrlib.i18n import gettext, ngettext """) # Explicitly import bzrlib.bzrdir so that the BzrProber # is guaranteed to be registered. import bzrlib.bzrdir from bzrlib import ( bzrdir, controldir, ) from bzrlib.decorators import ( needs_read_lock, needs_write_lock, only_raises, ) from bzrlib.hooks import Hooks from bzrlib.inter import InterObject from bzrlib.lock import _RelockDebugMixin, LogicalLockResult from bzrlib import registry from bzrlib.symbol_versioning import ( deprecated_in, deprecated_method, ) from bzrlib.trace import mutter, mutter_callsite, note, is_quiet class Branch(controldir.ControlComponent): """Branch holding a history of revisions. :ivar base: Base directory/url of the branch; using control_url and control_transport is more standardized. :ivar hooks: An instance of BranchHooks. :ivar _master_branch_cache: cached result of get_master_branch, see _clear_cached_state. """ # this is really an instance variable - FIXME move it there # - RBC 20060112 base = None @property def control_transport(self): return self._transport @property def user_transport(self): return self.bzrdir.user_transport def __init__(self, possible_transports=None): self.tags = self._format.make_tags(self) self._revision_history_cache = None self._revision_id_to_revno_cache = None self._partial_revision_id_to_revno_cache = {} self._partial_revision_history_cache = [] self._tags_bytes = None self._last_revision_info_cache = None self._master_branch_cache = None self._merge_sorted_revisions_cache = None self._open_hook(possible_transports) hooks = Branch.hooks['open'] for hook in hooks: hook(self) def _open_hook(self, possible_transports): """Called by init to allow simpler extension of the base class.""" def _activate_fallback_location(self, url, possible_transports): """Activate the branch/repository from url as a fallback repository.""" for existing_fallback_repo in self.repository._fallback_repositories: if existing_fallback_repo.user_url == url: # This fallback is already configured. This probably only # happens because ControlDir.sprout is a horrible mess. To avoid # confusing _unstack we don't add this a second time. mutter('duplicate activation of fallback %r on %r', url, self) return repo = self._get_fallback_repository(url, possible_transports) if repo.has_same_location(self.repository): raise errors.UnstackableLocationError(self.user_url, url) self.repository.add_fallback_repository(repo) def break_lock(self): """Break a lock if one is present from another instance. Uses the ui factory to ask for confirmation if the lock may be from an active process. This will probe the repository for its lock as well. """ self.control_files.break_lock() self.repository.break_lock() master = self.get_master_branch() if master is not None: master.break_lock() def _check_stackable_repo(self): if not self.repository._format.supports_external_lookups: raise errors.UnstackableRepositoryFormat(self.repository._format, self.repository.base) def _extend_partial_history(self, stop_index=None, stop_revision=None): """Extend the partial history to include a given index If a stop_index is supplied, stop when that index has been reached. If a stop_revision is supplied, stop when that revision is encountered. Otherwise, stop when the beginning of history is reached. :param stop_index: The index which should be present. When it is present, history extension will stop. :param stop_revision: The revision id which should be present. When it is encountered, history extension will stop. """ if len(self._partial_revision_history_cache) == 0: self._partial_revision_history_cache = [self.last_revision()] repository._iter_for_revno( self.repository, self._partial_revision_history_cache, stop_index=stop_index, stop_revision=stop_revision) if self._partial_revision_history_cache[-1] == _mod_revision.NULL_REVISION: self._partial_revision_history_cache.pop() def _get_check_refs(self): """Get the references needed for check(). See bzrlib.check. """ revid = self.last_revision() return [('revision-existence', revid), ('lefthand-distance', revid)] @staticmethod def open(base, _unsupported=False, possible_transports=None): """Open the branch rooted at base. For instance, if the branch is at URL/.bzr/branch, Branch.open(URL) -> a Branch instance. """ control = controldir.ControlDir.open(base, possible_transports=possible_transports, _unsupported=_unsupported) return control.open_branch(unsupported=_unsupported, possible_transports=possible_transports) @staticmethod def open_from_transport(transport, name=None, _unsupported=False, possible_transports=None): """Open the branch rooted at transport""" control = controldir.ControlDir.open_from_transport(transport, _unsupported) return control.open_branch(name=name, unsupported=_unsupported, possible_transports=possible_transports) @staticmethod def open_containing(url, possible_transports=None): """Open an existing branch which contains url. This probes for a branch at url, and searches upwards from there. Basically we keep looking up until we find the control directory or run into the root. If there isn't one, raises NotBranchError. If there is one and it is either an unrecognised format or an unsupported format, UnknownFormatError or UnsupportedFormatError are raised. If there is one, it is returned, along with the unused portion of url. """ control, relpath = controldir.ControlDir.open_containing(url, possible_transports) branch = control.open_branch(possible_transports=possible_transports) return (branch, relpath) def _push_should_merge_tags(self): """Should _basic_push merge this branch's tags into the target? The default implementation returns False if this branch has no tags, and True the rest of the time. Subclasses may override this. """ return self.supports_tags() and self.tags.get_tag_dict() def get_config(self): """Get a bzrlib.config.BranchConfig for this Branch. This can then be used to get and set configuration options for the branch. :return: A bzrlib.config.BranchConfig. """ return _mod_config.BranchConfig(self) def get_config_stack(self): """Get a bzrlib.config.BranchStack for this Branch. This can then be used to get and set configuration options for the branch. :return: A bzrlib.config.BranchStack. """ return _mod_config.BranchStack(self) def _get_config(self): """Get the concrete config for just the config in this branch. This is not intended for client use; see Branch.get_config for the public API. Added in 1.14. :return: An object supporting get_option and set_option. """ raise NotImplementedError(self._get_config) def store_uncommitted(self, creator): """Store uncommitted changes from a ShelfCreator. :param creator: The ShelfCreator containing uncommitted changes, or None to delete any stored changes. :raises: ChangesAlreadyStored if the branch already has changes. """ raise NotImplementedError(self.store_uncommitted) def get_unshelver(self, tree): """Return a shelf.Unshelver for this branch and tree. :param tree: The tree to use to construct the Unshelver. :return: an Unshelver or None if no changes are stored. """ raise NotImplementedError(self.get_unshelver) def _get_fallback_repository(self, url, possible_transports): """Get the repository we fallback to at url.""" url = urlutils.join(self.base, url) a_branch = Branch.open(url, possible_transports=possible_transports) return a_branch.repository @needs_read_lock def _get_tags_bytes(self): """Get the bytes of a serialised tags dict. Note that not all branches support tags, nor do all use the same tags logic: this method is specific to BasicTags. Other tag implementations may use the same method name and behave differently, safely, because of the double-dispatch via format.make_tags->tags_instance->get_tags_dict. :return: The bytes of the tags file. :seealso: Branch._set_tags_bytes. """ if self._tags_bytes is None: self._tags_bytes = self._transport.get_bytes('tags') return self._tags_bytes def _get_nick(self, local=False, possible_transports=None): config = self.get_config() # explicit overrides master, but don't look for master if local is True if not local and not config.has_explicit_nickname(): try: master = self.get_master_branch(possible_transports) if master and self.user_url == master.user_url: raise errors.RecursiveBind(self.user_url) if master is not None: # return the master branch value return master.nick except errors.RecursiveBind, e: raise e except errors.BzrError, e: # Silently fall back to local implicit nick if the master is # unavailable mutter("Could not connect to bound branch, " "falling back to local nick.\n " + str(e)) return config.get_nickname() def _set_nick(self, nick): self.get_config().set_user_option('nickname', nick, warn_masked=True) nick = property(_get_nick, _set_nick) def is_locked(self): raise NotImplementedError(self.is_locked) def _lefthand_history(self, revision_id, last_rev=None, other_branch=None): if 'evil' in debug.debug_flags: mutter_callsite(4, "_lefthand_history scales with history.") # stop_revision must be a descendant of last_revision graph = self.repository.get_graph() if last_rev is not None: if not graph.is_ancestor(last_rev, revision_id): # our previous tip is not merged into stop_revision raise errors.DivergedBranches(self, other_branch) # make a new revision history from the graph parents_map = graph.get_parent_map([revision_id]) if revision_id not in parents_map: raise errors.NoSuchRevision(self, revision_id) current_rev_id = revision_id new_history = [] check_not_reserved_id = _mod_revision.check_not_reserved_id # Do not include ghosts or graph origin in revision_history while (current_rev_id in parents_map and len(parents_map[current_rev_id]) > 0): check_not_reserved_id(current_rev_id) new_history.append(current_rev_id) current_rev_id = parents_map[current_rev_id][0] parents_map = graph.get_parent_map([current_rev_id]) new_history.reverse() return new_history def lock_write(self, token=None): """Lock the branch for write operations. :param token: A token to permit reacquiring a previously held and preserved lock. :return: A BranchWriteLockResult. """ raise NotImplementedError(self.lock_write) def lock_read(self): """Lock the branch for read operations. :return: A bzrlib.lock.LogicalLockResult. """ raise NotImplementedError(self.lock_read) def unlock(self): raise NotImplementedError(self.unlock) def peek_lock_mode(self): """Return lock mode for the Branch: 'r', 'w' or None""" raise NotImplementedError(self.peek_lock_mode) def get_physical_lock_status(self): raise NotImplementedError(self.get_physical_lock_status) @needs_read_lock def dotted_revno_to_revision_id(self, revno, _cache_reverse=False): """Return the revision_id for a dotted revno. :param revno: a tuple like (1,) or (1,1,2) :param _cache_reverse: a private parameter enabling storage of the reverse mapping in a top level cache. (This should only be done in selective circumstances as we want to avoid having the mapping cached multiple times.) :return: the revision_id :raises errors.NoSuchRevision: if the revno doesn't exist """ rev_id = self._do_dotted_revno_to_revision_id(revno) if _cache_reverse: self._partial_revision_id_to_revno_cache[rev_id] = revno return rev_id def _do_dotted_revno_to_revision_id(self, revno): """Worker function for dotted_revno_to_revision_id. Subclasses should override this if they wish to provide a more efficient implementation. """ if len(revno) == 1: return self.get_rev_id(revno[0]) revision_id_to_revno = self.get_revision_id_to_revno_map() revision_ids = [revision_id for revision_id, this_revno in revision_id_to_revno.iteritems() if revno == this_revno] if len(revision_ids) == 1: return revision_ids[0] else: revno_str = '.'.join(map(str, revno)) raise errors.NoSuchRevision(self, revno_str) @needs_read_lock def revision_id_to_dotted_revno(self, revision_id): """Given a revision id, return its dotted revno. :return: a tuple like (1,) or (400,1,3). """ return self._do_revision_id_to_dotted_revno(revision_id) def _do_revision_id_to_dotted_revno(self, revision_id): """Worker function for revision_id_to_revno.""" # Try the caches if they are loaded result = self._partial_revision_id_to_revno_cache.get(revision_id) if result is not None: return result if self._revision_id_to_revno_cache: result = self._revision_id_to_revno_cache.get(revision_id) if result is None: raise errors.NoSuchRevision(self, revision_id) # Try the mainline as it's optimised try: revno = self.revision_id_to_revno(revision_id) return (revno,) except errors.NoSuchRevision: # We need to load and use the full revno map after all result = self.get_revision_id_to_revno_map().get(revision_id) if result is None: raise errors.NoSuchRevision(self, revision_id) return result @needs_read_lock def get_revision_id_to_revno_map(self): """Return the revision_id => dotted revno map. This will be regenerated on demand, but will be cached. :return: A dictionary mapping revision_id => dotted revno. This dictionary should not be modified by the caller. """ if self._revision_id_to_revno_cache is not None: mapping = self._revision_id_to_revno_cache else: mapping = self._gen_revno_map() self._cache_revision_id_to_revno(mapping) # TODO: jam 20070417 Since this is being cached, should we be returning # a copy? # I would rather not, and instead just declare that users should not # modify the return value. return mapping def _gen_revno_map(self): """Create a new mapping from revision ids to dotted revnos. Dotted revnos are generated based on the current tip in the revision history. This is the worker function for get_revision_id_to_revno_map, which just caches the return value. :return: A dictionary mapping revision_id => dotted revno. """ revision_id_to_revno = dict((rev_id, revno) for rev_id, depth, revno, end_of_merge in self.iter_merge_sorted_revisions()) return revision_id_to_revno @needs_read_lock def iter_merge_sorted_revisions(self, start_revision_id=None, stop_revision_id=None, stop_rule='exclude', direction='reverse'): """Walk the revisions for a branch in merge sorted order. Merge sorted order is the output from a merge-aware, topological sort, i.e. all parents come before their children going forward; the opposite for reverse. :param start_revision_id: the revision_id to begin walking from. If None, the branch tip is used. :param stop_revision_id: the revision_id to terminate the walk after. If None, the rest of history is included. :param stop_rule: if stop_revision_id is not None, the precise rule to use for termination: * 'exclude' - leave the stop revision out of the result (default) * 'include' - the stop revision is the last item in the result * 'with-merges' - include the stop revision and all of its merged revisions in the result * 'with-merges-without-common-ancestry' - filter out revisions that are in both ancestries :param direction: either 'reverse' or 'forward': * reverse means return the start_revision_id first, i.e. start at the most recent revision and go backwards in history * forward returns tuples in the opposite order to reverse. Note in particular that forward does *not* do any intelligent ordering w.r.t. depth as some clients of this API may like. (If required, that ought to be done at higher layers.) :return: an iterator over (revision_id, depth, revno, end_of_merge) tuples where: * revision_id: the unique id of the revision * depth: How many levels of merging deep this node has been found. * revno_sequence: This field provides a sequence of revision numbers for all revisions. The format is: (REVNO, BRANCHNUM, BRANCHREVNO). BRANCHNUM is the number of the branch that the revno is on. From left to right the REVNO numbers are the sequence numbers within that branch of the revision. * end_of_merge: When True the next node (earlier in history) is part of a different merge. """ # Note: depth and revno values are in the context of the branch so # we need the full graph to get stable numbers, regardless of the # start_revision_id. if self._merge_sorted_revisions_cache is None: last_revision = self.last_revision() known_graph = self.repository.get_known_graph_ancestry( [last_revision]) self._merge_sorted_revisions_cache = known_graph.merge_sort( last_revision) filtered = self._filter_merge_sorted_revisions( self._merge_sorted_revisions_cache, start_revision_id, stop_revision_id, stop_rule) # Make sure we don't return revisions that are not part of the # start_revision_id ancestry. filtered = self._filter_start_non_ancestors(filtered) if direction == 'reverse': return filtered if direction == 'forward': return reversed(list(filtered)) else: raise ValueError('invalid direction %r' % direction) def _filter_merge_sorted_revisions(self, merge_sorted_revisions, start_revision_id, stop_revision_id, stop_rule): """Iterate over an inclusive range of sorted revisions.""" rev_iter = iter(merge_sorted_revisions) if start_revision_id is not None: for node in rev_iter: rev_id = node.key if rev_id != start_revision_id: continue else: # The decision to include the start or not # depends on the stop_rule if a stop is provided # so pop this node back into the iterator rev_iter = itertools.chain(iter([node]), rev_iter) break if stop_revision_id is None: # Yield everything for node in rev_iter: rev_id = node.key yield (rev_id, node.merge_depth, node.revno, node.end_of_merge) elif stop_rule == 'exclude': for node in rev_iter: rev_id = node.key if rev_id == stop_revision_id: return yield (rev_id, node.merge_depth, node.revno, node.end_of_merge) elif stop_rule == 'include': for node in rev_iter: rev_id = node.key yield (rev_id, node.merge_depth, node.revno, node.end_of_merge) if rev_id == stop_revision_id: return elif stop_rule == 'with-merges-without-common-ancestry': # We want to exclude all revisions that are already part of the # stop_revision_id ancestry. graph = self.repository.get_graph() ancestors = graph.find_unique_ancestors(start_revision_id, [stop_revision_id]) for node in rev_iter: rev_id = node.key if rev_id not in ancestors: continue yield (rev_id, node.merge_depth, node.revno, node.end_of_merge) elif stop_rule == 'with-merges': stop_rev = self.repository.get_revision(stop_revision_id) if stop_rev.parent_ids: left_parent = stop_rev.parent_ids[0] else: left_parent = _mod_revision.NULL_REVISION # left_parent is the actual revision we want to stop logging at, # since we want to show the merged revisions after the stop_rev too reached_stop_revision_id = False revision_id_whitelist = [] for node in rev_iter: rev_id = node.key if rev_id == left_parent: # reached the left parent after the stop_revision return if (not reached_stop_revision_id or rev_id in revision_id_whitelist): yield (rev_id, node.merge_depth, node.revno, node.end_of_merge) if reached_stop_revision_id or rev_id == stop_revision_id: # only do the merged revs of rev_id from now on rev = self.repository.get_revision(rev_id) if rev.parent_ids: reached_stop_revision_id = True revision_id_whitelist.extend(rev.parent_ids) else: raise ValueError('invalid stop_rule %r' % stop_rule) def _filter_start_non_ancestors(self, rev_iter): # If we started from a dotted revno, we want to consider it as a tip # and don't want to yield revisions that are not part of its # ancestry. Given the order guaranteed by the merge sort, we will see # uninteresting descendants of the first parent of our tip before the # tip itself. first = rev_iter.next() (rev_id, merge_depth, revno, end_of_merge) = first yield first if not merge_depth: # We start at a mainline revision so by definition, all others # revisions in rev_iter are ancestors for node in rev_iter: yield node clean = False whitelist = set() pmap = self.repository.get_parent_map([rev_id]) parents = pmap.get(rev_id, []) if parents: whitelist.update(parents) else: # If there is no parents, there is nothing of interest left # FIXME: It's hard to test this scenario here as this code is never # called in that case. -- vila 20100322 return for (rev_id, merge_depth, revno, end_of_merge) in rev_iter: if not clean: if rev_id in whitelist: pmap = self.repository.get_parent_map([rev_id]) parents = pmap.get(rev_id, []) whitelist.remove(rev_id) whitelist.update(parents) if merge_depth == 0: # We've reached the mainline, there is nothing left to # filter clean = True else: # A revision that is not part of the ancestry of our # starting revision. continue yield (rev_id, merge_depth, revno, end_of_merge) def leave_lock_in_place(self): """Tell this branch object not to release the physical lock when this object is unlocked. If lock_write doesn't return a token, then this method is not supported. """ self.control_files.leave_in_place() def dont_leave_lock_in_place(self): """Tell this branch object to release the physical lock when this object is unlocked, even if it didn't originally acquire it. If lock_write doesn't return a token, then this method is not supported. """ self.control_files.dont_leave_in_place() def bind(self, other): """Bind the local branch the other branch. :param other: The branch to bind to :type other: Branch """ raise errors.UpgradeRequired(self.user_url) def get_append_revisions_only(self): """Whether it is only possible to append revisions to the history. """ if not self._format.supports_set_append_revisions_only(): return False return self.get_config_stack().get('append_revisions_only') def set_append_revisions_only(self, enabled): if not self._format.supports_set_append_revisions_only(): raise errors.UpgradeRequired(self.user_url) self.get_config_stack().set('append_revisions_only', enabled) def set_reference_info(self, file_id, tree_path, branch_location): """Set the branch location to use for a tree reference.""" raise errors.UnsupportedOperation(self.set_reference_info, self) def get_reference_info(self, file_id): """Get the tree_path and branch_location for a tree reference.""" raise errors.UnsupportedOperation(self.get_reference_info, self) @needs_write_lock def fetch(self, from_branch, last_revision=None, limit=None): """Copy revisions from from_branch into this branch. :param from_branch: Where to copy from. :param last_revision: What revision to stop at (None for at the end of the branch. :param limit: Optional rough limit of revisions to fetch :return: None """ return InterBranch.get(from_branch, self).fetch(last_revision, limit=limit) def get_bound_location(self): """Return the URL of the branch we are bound to. Older format branches cannot bind, please be sure to use a metadir branch. """ return None def get_old_bound_location(self): """Return the URL of the branch we used to be bound to """ raise errors.UpgradeRequired(self.user_url) def get_commit_builder(self, parents, config_stack=None, timestamp=None, timezone=None, committer=None, revprops=None, revision_id=None, lossy=False): """Obtain a CommitBuilder for this branch. :param parents: Revision ids of the parents of the new revision. :param config: Optional configuration to use. :param timestamp: Optional timestamp recorded for commit. :param timezone: Optional timezone for timestamp. :param committer: Optional committer to set for commit. :param revprops: Optional dictionary of revision properties. :param revision_id: Optional revision id. :param lossy: Whether to discard data that can not be natively represented, when pushing to a foreign VCS """ if config_stack is None: config_stack = self.get_config_stack() return self.repository.get_commit_builder(self, parents, config_stack, timestamp, timezone, committer, revprops, revision_id, lossy) def get_master_branch(self, possible_transports=None): """Return the branch we are bound to. :return: Either a Branch, or None """ return None @deprecated_method(deprecated_in((2, 5, 0))) def get_revision_delta(self, revno): """Return the delta for one revision. The delta is relative to its mainline predecessor, or the empty tree for revision 1. """ try: revid = self.get_rev_id(revno) except errors.NoSuchRevision: raise errors.InvalidRevisionNumber(revno) return self.repository.get_revision_delta(revid) def get_stacked_on_url(self): """Get the URL this branch is stacked against. :raises NotStacked: If the branch is not stacked. :raises UnstackableBranchFormat: If the branch does not support stacking. """ raise NotImplementedError(self.get_stacked_on_url) def print_file(self, file, revision_id): """Print `file` to stdout.""" raise NotImplementedError(self.print_file) @needs_write_lock def set_last_revision_info(self, revno, revision_id): """Set the last revision of this branch. The caller is responsible for checking that the revno is correct for this revision id. It may be possible to set the branch last revision to an id not present in the repository. However, branches can also be configured to check constraints on history, in which case this may not be permitted. """ raise NotImplementedError(self.set_last_revision_info) @needs_write_lock def generate_revision_history(self, revision_id, last_rev=None, other_branch=None): """See Branch.generate_revision_history""" graph = self.repository.get_graph() (last_revno, last_revid) = self.last_revision_info() known_revision_ids = [ (last_revid, last_revno), (_mod_revision.NULL_REVISION, 0), ] if last_rev is not None: if not graph.is_ancestor(last_rev, revision_id): # our previous tip is not merged into stop_revision raise errors.DivergedBranches(self, other_branch) revno = graph.find_distance_to_null(revision_id, known_revision_ids) self.set_last_revision_info(revno, revision_id) @needs_write_lock def set_parent(self, url): """See Branch.set_parent.""" # TODO: Maybe delete old location files? # URLs should never be unicode, even on the local fs, # FIXUP this and get_parent in a future branch format bump: # read and rewrite the file. RBC 20060125 if url is not None: if isinstance(url, unicode): try: url = url.encode('ascii') except UnicodeEncodeError: raise errors.InvalidURL(url, "Urls must be 7-bit ascii, " "use bzrlib.urlutils.escape") url = urlutils.relative_url(self.base, url) self._set_parent_location(url) @needs_write_lock def set_stacked_on_url(self, url): """Set the URL this branch is stacked against. :raises UnstackableBranchFormat: If the branch does not support stacking. :raises UnstackableRepositoryFormat: If the repository does not support stacking. """ if not self._format.supports_stacking(): raise errors.UnstackableBranchFormat(self._format, self.user_url) # XXX: Changing from one fallback repository to another does not check # that all the data you need is present in the new fallback. # Possibly it should. self._check_stackable_repo() if not url: try: old_url = self.get_stacked_on_url() except (errors.NotStacked, errors.UnstackableBranchFormat, errors.UnstackableRepositoryFormat): return self._unstack() else: self._activate_fallback_location(url, possible_transports=[self.bzrdir.root_transport]) # write this out after the repository is stacked to avoid setting a # stacked config that doesn't work. self._set_config_location('stacked_on_location', url) def _unstack(self): """Change a branch to be unstacked, copying data as needed. Don't call this directly, use set_stacked_on_url(None). """ pb = ui.ui_factory.nested_progress_bar() try: pb.update(gettext("Unstacking")) # The basic approach here is to fetch the tip of the branch, # including all available ghosts, from the existing stacked # repository into a new repository object without the fallbacks. # # XXX: See - this may not be # correct for CHKMap repostiories old_repository = self.repository if len(old_repository._fallback_repositories) != 1: raise AssertionError("can't cope with fallback repositories " "of %r (fallbacks: %r)" % (old_repository, old_repository._fallback_repositories)) # Open the new repository object. # Repositories don't offer an interface to remove fallback # repositories today; take the conceptually simpler option and just # reopen it. We reopen it starting from the URL so that we # get a separate connection for RemoteRepositories and can # stream from one of them to the other. This does mean doing # separate SSH connection setup, but unstacking is not a # common operation so it's tolerable. new_bzrdir = controldir.ControlDir.open( self.bzrdir.root_transport.base) new_repository = new_bzrdir.find_repository() if new_repository._fallback_repositories: raise AssertionError("didn't expect %r to have " "fallback_repositories" % (self.repository,)) # Replace self.repository with the new repository. # Do our best to transfer the lock state (i.e. lock-tokens and # lock count) of self.repository to the new repository. lock_token = old_repository.lock_write().repository_token self.repository = new_repository if isinstance(self, remote.RemoteBranch): # Remote branches can have a second reference to the old # repository that need to be replaced. if self._real_branch is not None: self._real_branch.repository = new_repository self.repository.lock_write(token=lock_token) if lock_token is not None: old_repository.leave_lock_in_place() old_repository.unlock() if lock_token is not None: # XXX: self.repository.leave_lock_in_place() before this # function will not be preserved. Fortunately that doesn't # affect the current default format (2a), and would be a # corner-case anyway. # - Andrew Bennetts, 2010/06/30 self.repository.dont_leave_lock_in_place() old_lock_count = 0 while True: try: old_repository.unlock() except errors.LockNotHeld: break old_lock_count += 1 if old_lock_count == 0: raise AssertionError( 'old_repository should have been locked at least once.') for i in range(old_lock_count-1): self.repository.lock_write() # Fetch from the old repository into the new. old_repository.lock_read() try: # XXX: If you unstack a branch while it has a working tree # with a pending merge, the pending-merged revisions will no # longer be present. You can (probably) revert and remerge. try: tags_to_fetch = set(self.tags.get_reverse_tag_dict()) except errors.TagsNotSupported: tags_to_fetch = set() fetch_spec = vf_search.NotInOtherForRevs(self.repository, old_repository, required_ids=[self.last_revision()], if_present_ids=tags_to_fetch, find_ghosts=True).execute() self.repository.fetch(old_repository, fetch_spec=fetch_spec) finally: old_repository.unlock() finally: pb.finished() def _set_tags_bytes(self, bytes): """Mirror method for _get_tags_bytes. :seealso: Branch._get_tags_bytes. """ op = cleanup.OperationWithCleanups(self._set_tags_bytes_locked) op.add_cleanup(self.lock_write().unlock) return op.run_simple(bytes) def _set_tags_bytes_locked(self, bytes): self._tags_bytes = bytes return self._transport.put_bytes('tags', bytes) def _cache_revision_history(self, rev_history): """Set the cached revision history to rev_history. The revision_history method will use this cache to avoid regenerating the revision history. This API is semi-public; it only for use by subclasses, all other code should consider it to be private. """ self._revision_history_cache = rev_history def _cache_revision_id_to_revno(self, revision_id_to_revno): """Set the cached revision_id => revno map to revision_id_to_revno. This API is semi-public; it only for use by subclasses, all other code should consider it to be private. """ self._revision_id_to_revno_cache = revision_id_to_revno def _clear_cached_state(self): """Clear any cached data on this branch, e.g. cached revision history. This means the next call to revision_history will need to call _gen_revision_history. This API is semi-public; it is only for use by subclasses, all other code should consider it to be private. """ self._revision_history_cache = None self._revision_id_to_revno_cache = None self._last_revision_info_cache = None self._master_branch_cache = None self._merge_sorted_revisions_cache = None self._partial_revision_history_cache = [] self._partial_revision_id_to_revno_cache = {} self._tags_bytes = None def _gen_revision_history(self): """Return sequence of revision hashes on to this branch. Unlike revision_history, this method always regenerates or rereads the revision history, i.e. it does not cache the result, so repeated calls may be expensive. Concrete subclasses should override this instead of revision_history so that subclasses do not need to deal with caching logic. This API is semi-public; it only for use by subclasses, all other code should consider it to be private. """ raise NotImplementedError(self._gen_revision_history) def _revision_history(self): if 'evil' in debug.debug_flags: mutter_callsite(3, "revision_history scales with history.") if self._revision_history_cache is not None: history = self._revision_history_cache else: history = self._gen_revision_history() self._cache_revision_history(history) return list(history) def revno(self): """Return current revision number for this branch. That is equivalent to the number of revisions committed to this branch. """ return self.last_revision_info()[0] def unbind(self): """Older format branches cannot bind or unbind.""" raise errors.UpgradeRequired(self.user_url) def last_revision(self): """Return last revision id, or NULL_REVISION.""" return self.last_revision_info()[1] @needs_read_lock def last_revision_info(self): """Return information about the last revision. :return: A tuple (revno, revision_id). """ if self._last_revision_info_cache is None: self._last_revision_info_cache = self._read_last_revision_info() return self._last_revision_info_cache def _read_last_revision_info(self): raise NotImplementedError(self._read_last_revision_info) def import_last_revision_info_and_tags(self, source, revno, revid, lossy=False): """Set the last revision info, importing from another repo if necessary. This is used by the bound branch code to upload a revision to the master branch first before updating the tip of the local branch. Revisions referenced by source's tags are also transferred. :param source: Source branch to optionally fetch from :param revno: Revision number of the new tip :param revid: Revision id of the new tip :param lossy: Whether to discard metadata that can not be natively represented :return: Tuple with the new revision number and revision id (should only be different from the arguments when lossy=True) """ if not self.repository.has_same_location(source.repository): self.fetch(source, revid) self.set_last_revision_info(revno, revid) return (revno, revid) def revision_id_to_revno(self, revision_id): """Given a revision id, return its revno""" if _mod_revision.is_null(revision_id): return 0 history = self._revision_history() try: return history.index(revision_id) + 1 except ValueError: raise errors.NoSuchRevision(self, revision_id) @needs_read_lock def get_rev_id(self, revno, history=None): """Find the revision id of the specified revno.""" if revno == 0: return _mod_revision.NULL_REVISION last_revno, last_revid = self.last_revision_info() if revno == last_revno: return last_revid if revno <= 0 or revno > last_revno: raise errors.NoSuchRevision(self, revno) distance_from_last = last_revno - revno if len(self._partial_revision_history_cache) <= distance_from_last: self._extend_partial_history(distance_from_last) return self._partial_revision_history_cache[distance_from_last] def pull(self, source, overwrite=False, stop_revision=None, possible_transports=None, *args, **kwargs): """Mirror source into this branch. This branch is considered to be 'local', having low latency. :returns: PullResult instance """ return InterBranch.get(source, self).pull(overwrite=overwrite, stop_revision=stop_revision, possible_transports=possible_transports, *args, **kwargs) def push(self, target, overwrite=False, stop_revision=None, lossy=False, *args, **kwargs): """Mirror this branch into target. This branch is considered to be 'local', having low latency. """ return InterBranch.get(self, target).push(overwrite, stop_revision, lossy, *args, **kwargs) def basis_tree(self): """Return `Tree` object for last revision.""" return self.repository.revision_tree(self.last_revision()) def get_parent(self): """Return the parent location of the branch. This is the default location for pull/missing. The usual pattern is that the user can override it by specifying a location. """ parent = self._get_parent_location() if parent is None: return parent # This is an old-format absolute path to a local branch # turn it into a url if parent.startswith('/'): parent = urlutils.local_path_to_url(parent.decode('utf8')) try: return urlutils.join(self.base[:-1], parent) except errors.InvalidURLJoin, e: raise errors.InaccessibleParent(parent, self.user_url) def _get_parent_location(self): raise NotImplementedError(self._get_parent_location) def _set_config_location(self, name, url, config=None, make_relative=False): if config is None: config = self.get_config_stack() if url is None: url = '' elif make_relative: url = urlutils.relative_url(self.base, url) config.set(name, url) def _get_config_location(self, name, config=None): if config is None: config = self.get_config_stack() location = config.get(name) if location == '': location = None return location def get_child_submit_format(self): """Return the preferred format of submissions to this branch.""" return self.get_config_stack().get('child_submit_format') def get_submit_branch(self): """Return the submit location of the branch. This is the default location for bundle. The usual pattern is that the user can override it by specifying a location. """ return self.get_config_stack().get('submit_branch') def set_submit_branch(self, location): """Return the submit location of the branch. This is the default location for bundle. The usual pattern is that the user can override it by specifying a location. """ self.get_config_stack().set('submit_branch', location) def get_public_branch(self): """Return the public location of the branch. This is used by merge directives. """ return self._get_config_location('public_branch') def set_public_branch(self, location): """Return the submit location of the branch. This is the default location for bundle. The usual pattern is that the user can override it by specifying a location. """ self._set_config_location('public_branch', location) def get_push_location(self): """Return None or the location to push this branch to.""" return self.get_config_stack().get('push_location') def set_push_location(self, location): """Set a new push location for this branch.""" raise NotImplementedError(self.set_push_location) def _run_post_change_branch_tip_hooks(self, old_revno, old_revid): """Run the post_change_branch_tip hooks.""" hooks = Branch.hooks['post_change_branch_tip'] if not hooks: return new_revno, new_revid = self.last_revision_info() params = ChangeBranchTipParams( self, old_revno, new_revno, old_revid, new_revid) for hook in hooks: hook(params) def _run_pre_change_branch_tip_hooks(self, new_revno, new_revid): """Run the pre_change_branch_tip hooks.""" hooks = Branch.hooks['pre_change_branch_tip'] if not hooks: return old_revno, old_revid = self.last_revision_info() params = ChangeBranchTipParams( self, old_revno, new_revno, old_revid, new_revid) for hook in hooks: hook(params) @needs_write_lock def update(self): """Synchronise this branch with the master branch if any. :return: None or the last_revision pivoted out during the update. """ return None def check_revno(self, revno): """\ Check whether a revno corresponds to any revision. Zero (the NULL revision) is considered valid. """ if revno != 0: self.check_real_revno(revno) def check_real_revno(self, revno): """\ Check whether a revno corresponds to a real revision. Zero (the NULL revision) is considered invalid """ if revno < 1 or revno > self.revno(): raise errors.InvalidRevisionNumber(revno) @needs_read_lock def clone(self, to_bzrdir, revision_id=None, repository_policy=None): """Clone this branch into to_bzrdir preserving all semantic values. Most API users will want 'create_clone_on_transport', which creates a new bzrdir and branch on the fly. revision_id: if not None, the revision history in the new branch will be truncated to end with revision_id. """ result = to_bzrdir.create_branch() result.lock_write() try: if repository_policy is not None: repository_policy.configure_branch(result) self.copy_content_into(result, revision_id=revision_id) finally: result.unlock() return result @needs_read_lock def sprout(self, to_bzrdir, revision_id=None, repository_policy=None, repository=None): """Create a new line of development from the branch, into to_bzrdir. to_bzrdir controls the branch format. revision_id: if not None, the revision history in the new branch will be truncated to end with revision_id. """ if (repository_policy is not None and repository_policy.requires_stacking()): to_bzrdir._format.require_stacking(_skip_repo=True) result = to_bzrdir.create_branch(repository=repository) result.lock_write() try: if repository_policy is not None: repository_policy.configure_branch(result) self.copy_content_into(result, revision_id=revision_id) master_url = self.get_bound_location() if master_url is None: result.set_parent(self.bzrdir.root_transport.base) else: result.set_parent(master_url) finally: result.unlock() return result def _synchronize_history(self, destination, revision_id): """Synchronize last revision and revision history between branches. This version is most efficient when the destination is also a BzrBranch6, but works for BzrBranch5, as long as the destination's repository contains all the lefthand ancestors of the intended last_revision. If not, set_last_revision_info will fail. :param destination: The branch to copy the history into :param revision_id: The revision-id to truncate history at. May be None to copy complete history. """ source_revno, source_revision_id = self.last_revision_info() if revision_id is None: revno, revision_id = source_revno, source_revision_id else: graph = self.repository.get_graph() try: revno = graph.find_distance_to_null(revision_id, [(source_revision_id, source_revno)]) except errors.GhostRevisionsHaveNoRevno: # Default to 1, if we can't find anything else revno = 1 destination.set_last_revision_info(revno, revision_id) def copy_content_into(self, destination, revision_id=None): """Copy the content of self into destination. revision_id: if not None, the revision history in the new branch will be truncated to end with revision_id. """ return InterBranch.get(self, destination).copy_content_into( revision_id=revision_id) def update_references(self, target): if not getattr(self._format, 'supports_reference_locations', False): return reference_dict = self._get_all_reference_info() if len(reference_dict) == 0: return old_base = self.base new_base = target.base target_reference_dict = target._get_all_reference_info() for file_id, (tree_path, branch_location) in ( reference_dict.items()): branch_location = urlutils.rebase_url(branch_location, old_base, new_base) target_reference_dict.setdefault( file_id, (tree_path, branch_location)) target._set_all_reference_info(target_reference_dict) @needs_read_lock def check(self, refs): """Check consistency of the branch. In particular this checks that revisions given in the revision-history do actually match up in the revision graph, and that they're all present in the repository. Callers will typically also want to check the repository. :param refs: Calculated refs for this branch as specified by branch._get_check_refs() :return: A BranchCheckResult. """ result = BranchCheckResult(self) last_revno, last_revision_id = self.last_revision_info() actual_revno = refs[('lefthand-distance', last_revision_id)] if actual_revno != last_revno: result.errors.append(errors.BzrCheckError( 'revno does not match len(mainline) %s != %s' % ( last_revno, actual_revno))) # TODO: We should probably also check that self.revision_history # matches the repository for older branch formats. # If looking for the code that cross-checks repository parents against # the Graph.iter_lefthand_ancestry output, that is now a repository # specific check. return result def _get_checkout_format(self, lightweight=False): """Return the most suitable metadir for a checkout of this branch. Weaves are used if this branch's repository uses weaves. """ format = self.repository.bzrdir.checkout_metadir() format.set_branch_format(self._format) return format def create_clone_on_transport(self, to_transport, revision_id=None, stacked_on=None, create_prefix=False, use_existing_dir=False, no_tree=None): """Create a clone of this branch and its bzrdir. :param to_transport: The transport to clone onto. :param revision_id: The revision id to use as tip in the new branch. If None the tip is obtained from this branch. :param stacked_on: An optional URL to stack the clone on. :param create_prefix: Create any missing directories leading up to to_transport. :param use_existing_dir: Use an existing directory if one exists. """ # XXX: Fix the bzrdir API to allow getting the branch back from the # clone call. Or something. 20090224 RBC/spiv. # XXX: Should this perhaps clone colocated branches as well, # rather than just the default branch? 20100319 JRV if revision_id is None: revision_id = self.last_revision() dir_to = self.bzrdir.clone_on_transport(to_transport, revision_id=revision_id, stacked_on=stacked_on, create_prefix=create_prefix, use_existing_dir=use_existing_dir, no_tree=no_tree) return dir_to.open_branch() def create_checkout(self, to_location, revision_id=None, lightweight=False, accelerator_tree=None, hardlink=False): """Create a checkout of a branch. :param to_location: The url to produce the checkout at :param revision_id: The revision to check out :param lightweight: If True, produce a lightweight checkout, otherwise, produce a bound branch (heavyweight checkout) :param accelerator_tree: A tree which can be used for retrieving file contents more quickly than the revision tree, i.e. a workingtree. The revision tree will be used for cases where accelerator_tree's content is different. :param hardlink: If true, hard-link files from accelerator_tree, where possible. :return: The tree of the created checkout """ t = transport.get_transport(to_location) t.ensure_base() format = self._get_checkout_format(lightweight=lightweight) try: checkout = format.initialize_on_transport(t) except errors.AlreadyControlDirError: # It's fine if the control directory already exists, # as long as there is no existing branch and working tree. checkout = controldir.ControlDir.open_from_transport(t) try: checkout.open_branch() except errors.NotBranchError: pass else: raise errors.AlreadyControlDirError(t.base) if checkout.control_transport.base == self.bzrdir.control_transport.base: # When checking out to the same control directory, # always create a lightweight checkout lightweight = True if lightweight: from_branch = checkout.set_branch_reference(target_branch=self) else: policy = checkout.determine_repository_policy() repo = policy.acquire_repository()[0] checkout_branch = checkout.create_branch() checkout_branch.bind(self) # pull up to the specified revision_id to set the initial # branch tip correctly, and seed it with history. checkout_branch.pull(self, stop_revision=revision_id) from_branch = None tree = checkout.create_workingtree(revision_id, from_branch=from_branch, accelerator_tree=accelerator_tree, hardlink=hardlink) basis_tree = tree.basis_tree() basis_tree.lock_read() try: for path, file_id in basis_tree.iter_references(): reference_parent = self.reference_parent(file_id, path) reference_parent.create_checkout(tree.abspath(path), basis_tree.get_reference_revision(file_id, path), lightweight) finally: basis_tree.unlock() return tree @needs_write_lock def reconcile(self, thorough=True): """Make sure the data stored in this branch is consistent.""" from bzrlib.reconcile import BranchReconciler reconciler = BranchReconciler(self, thorough=thorough) reconciler.reconcile() return reconciler def reference_parent(self, file_id, path, possible_transports=None): """Return the parent branch for a tree-reference file_id :param file_id: The file_id of the tree reference :param path: The path of the file_id in the tree :return: A branch associated with the file_id """ # FIXME should provide multiple branches, based on config return Branch.open(self.bzrdir.root_transport.clone(path).base, possible_transports=possible_transports) def supports_tags(self): return self._format.supports_tags() def automatic_tag_name(self, revision_id): """Try to automatically find the tag name for a revision. :param revision_id: Revision id of the revision. :return: A tag name or None if no tag name could be determined. """ for hook in Branch.hooks['automatic_tag_name']: ret = hook(self, revision_id) if ret is not None: return ret return None def _check_if_descendant_or_diverged(self, revision_a, revision_b, graph, other_branch): """Ensure that revision_b is a descendant of revision_a. This is a helper function for update_revisions. :raises: DivergedBranches if revision_b has diverged from revision_a. :returns: True if revision_b is a descendant of revision_a. """ relation = self._revision_relations(revision_a, revision_b, graph) if relation == 'b_descends_from_a': return True elif relation == 'diverged': raise errors.DivergedBranches(self, other_branch) elif relation == 'a_descends_from_b': return False else: raise AssertionError("invalid relation: %r" % (relation,)) def _revision_relations(self, revision_a, revision_b, graph): """Determine the relationship between two revisions. :returns: One of: 'a_descends_from_b', 'b_descends_from_a', 'diverged' """ heads = graph.heads([revision_a, revision_b]) if heads == set([revision_b]): return 'b_descends_from_a' elif heads == set([revision_a, revision_b]): # These branches have diverged return 'diverged' elif heads == set([revision_a]): return 'a_descends_from_b' else: raise AssertionError("invalid heads: %r" % (heads,)) def heads_to_fetch(self): """Return the heads that must and that should be fetched to copy this branch into another repo. :returns: a 2-tuple of (must_fetch, if_present_fetch). must_fetch is a set of heads that must be fetched. if_present_fetch is a set of heads that must be fetched if present, but no error is necessary if they are not present. """ # For bzr native formats must_fetch is just the tip, and # if_present_fetch are the tags. must_fetch = set([self.last_revision()]) if_present_fetch = set() if self.get_config_stack().get('branch.fetch_tags'): try: if_present_fetch = set(self.tags.get_reverse_tag_dict()) except errors.TagsNotSupported: pass must_fetch.discard(_mod_revision.NULL_REVISION) if_present_fetch.discard(_mod_revision.NULL_REVISION) return must_fetch, if_present_fetch class BranchFormat(controldir.ControlComponentFormat): """An encapsulation of the initialization and open routines for a format. Formats provide three things: * An initialization routine, * a format description * an open routine. Formats are placed in an dict by their format string for reference during branch opening. It's not required that these be instances, they can be classes themselves with class methods - it simply depends on whether state is needed for a given format or not. Once a format is deprecated, just deprecate the initialize and open methods on the format class. Do not deprecate the object, as the object will be created every time regardless. """ def __eq__(self, other): return self.__class__ is other.__class__ def __ne__(self, other): return not (self == other) def get_reference(self, controldir, name=None): """Get the target reference of the branch in controldir. format probing must have been completed before calling this method - it is assumed that the format of the branch in controldir is correct. :param controldir: The controldir to get the branch data from. :param name: Name of the colocated branch to fetch :return: None if the branch is not a reference branch. """ return None @classmethod def set_reference(self, controldir, name, to_branch): """Set the target reference of the branch in controldir. format probing must have been completed before calling this method - it is assumed that the format of the branch in controldir is correct. :param controldir: The controldir to set the branch reference for. :param name: Name of colocated branch to set, None for default :param to_branch: branch that the checkout is to reference """ raise NotImplementedError(self.set_reference) def get_format_description(self): """Return the short format description for this format.""" raise NotImplementedError(self.get_format_description) def _run_post_branch_init_hooks(self, controldir, name, branch): hooks = Branch.hooks['post_branch_init'] if not hooks: return params = BranchInitHookParams(self, controldir, name, branch) for hook in hooks: hook(params) def initialize(self, controldir, name=None, repository=None, append_revisions_only=None): """Create a branch of this format in controldir. :param name: Name of the colocated branch to create. """ raise NotImplementedError(self.initialize) def is_supported(self): """Is this format supported? Supported formats can be initialized and opened. Unsupported formats may not support initialization or committing or some other features depending on the reason for not being supported. """ return True def make_tags(self, branch): """Create a tags object for branch. This method is on BranchFormat, because BranchFormats are reflected over the wire via network_name(), whereas full Branch instances require multiple VFS method calls to operate at all. The default implementation returns a disabled-tags instance. Note that it is normal for branch to be a RemoteBranch when using tags on a RemoteBranch. """ return _mod_tag.DisabledTags(branch) def network_name(self): """A simple byte string uniquely identifying this format for RPC calls. MetaDir branch formats use their disk format string to identify the repository over the wire. All in one formats such as bzr < 0.8, and foreign formats like svn/git and hg should use some marker which is unique and immutable. """ raise NotImplementedError(self.network_name) def open(self, controldir, name=None, _found=False, ignore_fallbacks=False, found_repository=None, possible_transports=None): """Return the branch object for controldir. :param controldir: A ControlDir that contains a branch. :param name: Name of colocated branch to open :param _found: a private parameter, do not use it. It is used to indicate if format probing has already be done. :param ignore_fallbacks: when set, no fallback branches will be opened (if there are any). Default is to open fallbacks. """ raise NotImplementedError(self.open) def supports_set_append_revisions_only(self): """True if this format supports set_append_revisions_only.""" return False def supports_stacking(self): """True if this format records a stacked-on branch.""" return False def supports_leaving_lock(self): """True if this format supports leaving locks in place.""" return False # by default def __str__(self): return self.get_format_description().rstrip() def supports_tags(self): """True if this format supports tags stored in the branch""" return False # by default def tags_are_versioned(self): """Whether the tag container for this branch versions tags.""" return False def supports_tags_referencing_ghosts(self): """True if tags can reference ghost revisions.""" return True class MetaDirBranchFormatFactory(registry._LazyObjectGetter): """A factory for a BranchFormat object, permitting simple lazy registration. While none of the built in BranchFormats are lazy registered yet, bzrlib.tests.test_branch.TestMetaDirBranchFormatFactory demonstrates how to use it, and the bzr-loom plugin uses it as well (see bzrlib.plugins.loom.formats). """ def __init__(self, format_string, module_name, member_name): """Create a MetaDirBranchFormatFactory. :param format_string: The format string the format has. :param module_name: Module to load the format class from. :param member_name: Attribute name within the module for the format class. """ registry._LazyObjectGetter.__init__(self, module_name, member_name) self._format_string = format_string def get_format_string(self): """See BranchFormat.get_format_string.""" return self._format_string def __call__(self): """Used for network_format_registry support.""" return self.get_obj()() class BranchHooks(Hooks): """A dictionary mapping hook name to a list of callables for branch hooks. e.g. ['post_push'] Is the list of items to be called when the push function is invoked. """ def __init__(self): """Create the default hooks. These are all empty initially, because by default nothing should get notified. """ Hooks.__init__(self, "bzrlib.branch", "Branch.hooks") self.add_hook('open', "Called with the Branch object that has been opened after a " "branch is opened.", (1, 8)) self.add_hook('post_push', "Called after a push operation completes. post_push is called " "with a bzrlib.branch.BranchPushResult object and only runs in the " "bzr client.", (0, 15)) self.add_hook('post_pull', "Called after a pull operation completes. post_pull is called " "with a bzrlib.branch.PullResult object and only runs in the " "bzr client.", (0, 15)) self.add_hook('pre_commit', "Called after a commit is calculated but before it is " "completed. pre_commit is called with (local, master, old_revno, " "old_revid, future_revno, future_revid, tree_delta, future_tree" "). old_revid is NULL_REVISION for the first commit to a branch, " "tree_delta is a TreeDelta object describing changes from the " "basis revision. hooks MUST NOT modify this delta. " " future_tree is an in-memory tree obtained from " "CommitBuilder.revision_tree() and hooks MUST NOT modify this " "tree.", (0,91)) self.add_hook('post_commit', "Called in the bzr client after a commit has completed. " "post_commit is called with (local, master, old_revno, old_revid, " "new_revno, new_revid). old_revid is NULL_REVISION for the first " "commit to a branch.", (0, 15)) self.add_hook('post_uncommit', "Called in the bzr client after an uncommit completes. " "post_uncommit is called with (local, master, old_revno, " "old_revid, new_revno, new_revid) where local is the local branch " "or None, master is the target branch, and an empty branch " "receives new_revno of 0, new_revid of None.", (0, 15)) self.add_hook('pre_change_branch_tip', "Called in bzr client and server before a change to the tip of a " "branch is made. pre_change_branch_tip is called with a " "bzrlib.branch.ChangeBranchTipParams. Note that push, pull, " "commit, uncommit will all trigger this hook.", (1, 6)) self.add_hook('post_change_branch_tip', "Called in bzr client and server after a change to the tip of a " "branch is made. post_change_branch_tip is called with a " "bzrlib.branch.ChangeBranchTipParams. Note that push, pull, " "commit, uncommit will all trigger this hook.", (1, 4)) self.add_hook('transform_fallback_location', "Called when a stacked branch is activating its fallback " "locations. transform_fallback_location is called with (branch, " "url), and should return a new url. Returning the same url " "allows it to be used as-is, returning a different one can be " "used to cause the branch to stack on a closer copy of that " "fallback_location. Note that the branch cannot have history " "accessing methods called on it during this hook because the " "fallback locations have not been activated. When there are " "multiple hooks installed for transform_fallback_location, " "all are called with the url returned from the previous hook." "The order is however undefined.", (1, 9)) self.add_hook('automatic_tag_name', "Called to determine an automatic tag name for a revision. " "automatic_tag_name is called with (branch, revision_id) and " "should return a tag name or None if no tag name could be " "determined. The first non-None tag name returned will be used.", (2, 2)) self.add_hook('post_branch_init', "Called after new branch initialization completes. " "post_branch_init is called with a " "bzrlib.branch.BranchInitHookParams. " "Note that init, branch and checkout (both heavyweight and " "lightweight) will all trigger this hook.", (2, 2)) self.add_hook('post_switch', "Called after a checkout switches branch. " "post_switch is called with a " "bzrlib.branch.SwitchHookParams.", (2, 2)) # install the default hooks into the Branch class. Branch.hooks = BranchHooks() class ChangeBranchTipParams(object): """Object holding parameters passed to `*_change_branch_tip` hooks. There are 5 fields that hooks may wish to access: :ivar branch: the branch being changed :ivar old_revno: revision number before the change :ivar new_revno: revision number after the change :ivar old_revid: revision id before the change :ivar new_revid: revision id after the change The revid fields are strings. The revno fields are integers. """ def __init__(self, branch, old_revno, new_revno, old_revid, new_revid): """Create a group of ChangeBranchTip parameters. :param branch: The branch being changed. :param old_revno: Revision number before the change. :param new_revno: Revision number after the change. :param old_revid: Tip revision id before the change. :param new_revid: Tip revision id after the change. """ self.branch = branch self.old_revno = old_revno self.new_revno = new_revno self.old_revid = old_revid self.new_revid = new_revid def __eq__(self, other): return self.__dict__ == other.__dict__ def __repr__(self): return "<%s of %s from (%s, %s) to (%s, %s)>" % ( self.__class__.__name__, self.branch, self.old_revno, self.old_revid, self.new_revno, self.new_revid) class BranchInitHookParams(object): """Object holding parameters passed to `*_branch_init` hooks. There are 4 fields that hooks may wish to access: :ivar format: the branch format :ivar bzrdir: the ControlDir where the branch will be/has been initialized :ivar name: name of colocated branch, if any (or None) :ivar branch: the branch created Note that for lightweight checkouts, the bzrdir and format fields refer to the checkout, hence they are different from the corresponding fields in branch, which refer to the original branch. """ def __init__(self, format, controldir, name, branch): """Create a group of BranchInitHook parameters. :param format: the branch format :param controldir: the ControlDir where the branch will be/has been initialized :param name: name of colocated branch, if any (or None) :param branch: the branch created Note that for lightweight checkouts, the bzrdir and format fields refer to the checkout, hence they are different from the corresponding fields in branch, which refer to the original branch. """ self.format = format self.bzrdir = controldir self.name = name self.branch = branch def __eq__(self, other): return self.__dict__ == other.__dict__ def __repr__(self): return "<%s of %s>" % (self.__class__.__name__, self.branch) class SwitchHookParams(object): """Object holding parameters passed to `*_switch` hooks. There are 4 fields that hooks may wish to access: :ivar control_dir: ControlDir of the checkout to change :ivar to_branch: branch that the checkout is to reference :ivar force: skip the check for local commits in a heavy checkout :ivar revision_id: revision ID to switch to (or None) """ def __init__(self, control_dir, to_branch, force, revision_id): """Create a group of SwitchHook parameters. :param control_dir: ControlDir of the checkout to change :param to_branch: branch that the checkout is to reference :param force: skip the check for local commits in a heavy checkout :param revision_id: revision ID to switch to (or None) """ self.control_dir = control_dir self.to_branch = to_branch self.force = force self.revision_id = revision_id def __eq__(self, other): return self.__dict__ == other.__dict__ def __repr__(self): return "<%s for %s to (%s, %s)>" % (self.__class__.__name__, self.control_dir, self.to_branch, self.revision_id) class BranchFormatMetadir(bzrdir.BzrFormat, BranchFormat): """Base class for branch formats that live in meta directories. """ def __init__(self): BranchFormat.__init__(self) bzrdir.BzrFormat.__init__(self) @classmethod def find_format(klass, controldir, name=None): """Return the format for the branch object in controldir.""" try: transport = controldir.get_branch_transport(None, name=name) except errors.NoSuchFile: raise errors.NotBranchError(path=name, bzrdir=controldir) try: format_string = transport.get_bytes("format") except errors.NoSuchFile: raise errors.NotBranchError(path=transport.base, bzrdir=controldir) return klass._find_format(format_registry, 'branch', format_string) def _branch_class(self): """What class to instantiate on open calls.""" raise NotImplementedError(self._branch_class) def _get_initial_config(self, append_revisions_only=None): if append_revisions_only: return "append_revisions_only = True\n" else: # Avoid writing anything if append_revisions_only is disabled, # as that is the default. return "" def _initialize_helper(self, a_bzrdir, utf8_files, name=None, repository=None): """Initialize a branch in a control dir, with specified files :param a_bzrdir: The bzrdir to initialize the branch in :param utf8_files: The files to create as a list of (filename, content) tuples :param name: Name of colocated branch to create, if any :return: a branch in this format """ if name is None: name = a_bzrdir._get_selected_branch() mutter('creating branch %r in %s', self, a_bzrdir.user_url) branch_transport = a_bzrdir.get_branch_transport(self, name=name) control_files = lockable_files.LockableFiles(branch_transport, 'lock', lockdir.LockDir) control_files.create_lock() control_files.lock_write() try: utf8_files += [('format', self.as_string())] for (filename, content) in utf8_files: branch_transport.put_bytes( filename, content, mode=a_bzrdir._get_file_mode()) finally: control_files.unlock() branch = self.open(a_bzrdir, name, _found=True, found_repository=repository) self._run_post_branch_init_hooks(a_bzrdir, name, branch) return branch def open(self, a_bzrdir, name=None, _found=False, ignore_fallbacks=False, found_repository=None, possible_transports=None): """See BranchFormat.open().""" if name is None: name = a_bzrdir._get_selected_branch() if not _found: format = BranchFormatMetadir.find_format(a_bzrdir, name=name) if format.__class__ != self.__class__: raise AssertionError("wrong format %r found for %r" % (format, self)) transport = a_bzrdir.get_branch_transport(None, name=name) try: control_files = lockable_files.LockableFiles(transport, 'lock', lockdir.LockDir) if found_repository is None: found_repository = a_bzrdir.find_repository() return self._branch_class()(_format=self, _control_files=control_files, name=name, a_bzrdir=a_bzrdir, _repository=found_repository, ignore_fallbacks=ignore_fallbacks, possible_transports=possible_transports) except errors.NoSuchFile: raise errors.NotBranchError(path=transport.base, bzrdir=a_bzrdir) @property def _matchingbzrdir(self): ret = bzrdir.BzrDirMetaFormat1() ret.set_branch_format(self) return ret def supports_tags(self): return True def supports_leaving_lock(self): return True def check_support_status(self, allow_unsupported, recommend_upgrade=True, basedir=None): BranchFormat.check_support_status(self, allow_unsupported=allow_unsupported, recommend_upgrade=recommend_upgrade, basedir=basedir) bzrdir.BzrFormat.check_support_status(self, allow_unsupported=allow_unsupported, recommend_upgrade=recommend_upgrade, basedir=basedir) class BzrBranchFormat6(BranchFormatMetadir): """Branch format with last-revision and tags. Unlike previous formats, this has no explicit revision history. Instead, this just stores the last-revision, and the left-hand history leading up to there is the history. This format was introduced in bzr 0.15 and became the default in 0.91. """ def _branch_class(self): return BzrBranch6 @classmethod def get_format_string(cls): """See BranchFormat.get_format_string().""" return "Bazaar Branch Format 6 (bzr 0.15)\n" def get_format_description(self): """See BranchFormat.get_format_description().""" return "Branch format 6" def initialize(self, a_bzrdir, name=None, repository=None, append_revisions_only=None): """Create a branch of this format in a_bzrdir.""" utf8_files = [('last-revision', '0 null:\n'), ('branch.conf', self._get_initial_config(append_revisions_only)), ('tags', ''), ] return self._initialize_helper(a_bzrdir, utf8_files, name, repository) def make_tags(self, branch): """See bzrlib.branch.BranchFormat.make_tags().""" return _mod_tag.BasicTags(branch) def supports_set_append_revisions_only(self): return True class BzrBranchFormat8(BranchFormatMetadir): """Metadir format supporting storing locations of subtree branches.""" def _branch_class(self): return BzrBranch8 @classmethod def get_format_string(cls): """See BranchFormat.get_format_string().""" return "Bazaar Branch Format 8 (needs bzr 1.15)\n" def get_format_description(self): """See BranchFormat.get_format_description().""" return "Branch format 8" def initialize(self, a_bzrdir, name=None, repository=None, append_revisions_only=None): """Create a branch of this format in a_bzrdir.""" utf8_files = [('last-revision', '0 null:\n'), ('branch.conf', self._get_initial_config(append_revisions_only)), ('tags', ''), ('references', '') ] return self._initialize_helper(a_bzrdir, utf8_files, name, repository) def make_tags(self, branch): """See bzrlib.branch.BranchFormat.make_tags().""" return _mod_tag.BasicTags(branch) def supports_set_append_revisions_only(self): return True def supports_stacking(self): return True supports_reference_locations = True class BzrBranchFormat7(BranchFormatMetadir): """Branch format with last-revision, tags, and a stacked location pointer. The stacked location pointer is passed down to the repository and requires a repository format with supports_external_lookups = True. This format was introduced in bzr 1.6. """ def initialize(self, a_bzrdir, name=None, repository=None, append_revisions_only=None): """Create a branch of this format in a_bzrdir.""" utf8_files = [('last-revision', '0 null:\n'), ('branch.conf', self._get_initial_config(append_revisions_only)), ('tags', ''), ] return self._initialize_helper(a_bzrdir, utf8_files, name, repository) def _branch_class(self): return BzrBranch7 @classmethod def get_format_string(cls): """See BranchFormat.get_format_string().""" return "Bazaar Branch Format 7 (needs bzr 1.6)\n" def get_format_description(self): """See BranchFormat.get_format_description().""" return "Branch format 7" def supports_set_append_revisions_only(self): return True def supports_stacking(self): return True def make_tags(self, branch): """See bzrlib.branch.BranchFormat.make_tags().""" return _mod_tag.BasicTags(branch) supports_reference_locations = False class BranchReferenceFormat(BranchFormatMetadir): """Bzr branch reference format. Branch references are used in implementing checkouts, they act as an alias to the real branch which is at some other url. This format has: - A location file - a format string """ @classmethod def get_format_string(cls): """See BranchFormat.get_format_string().""" return "Bazaar-NG Branch Reference Format 1\n" def get_format_description(self): """See BranchFormat.get_format_description().""" return "Checkout reference format 1" def get_reference(self, a_bzrdir, name=None): """See BranchFormat.get_reference().""" transport = a_bzrdir.get_branch_transport(None, name=name) return transport.get_bytes('location') def set_reference(self, a_bzrdir, name, to_branch): """See BranchFormat.set_reference().""" transport = a_bzrdir.get_branch_transport(None, name=name) location = transport.put_bytes('location', to_branch.base) def initialize(self, a_bzrdir, name=None, target_branch=None, repository=None, append_revisions_only=None): """Create a branch of this format in a_bzrdir.""" if target_branch is None: # this format does not implement branch itself, thus the implicit # creation contract must see it as uninitializable raise errors.UninitializableFormat(self) mutter('creating branch reference in %s', a_bzrdir.user_url) if a_bzrdir._format.fixed_components: raise errors.IncompatibleFormat(self, a_bzrdir._format) if name is None: name = a_bzrdir._get_selected_branch() branch_transport = a_bzrdir.get_branch_transport(self, name=name) branch_transport.put_bytes('location', target_branch.user_url) branch_transport.put_bytes('format', self.as_string()) branch = self.open(a_bzrdir, name, _found=True, possible_transports=[target_branch.bzrdir.root_transport]) self._run_post_branch_init_hooks(a_bzrdir, name, branch) return branch def _make_reference_clone_function(format, a_branch): """Create a clone() routine for a branch dynamically.""" def clone(to_bzrdir, revision_id=None, repository_policy=None): """See Branch.clone().""" return format.initialize(to_bzrdir, target_branch=a_branch) # cannot obey revision_id limits when cloning a reference ... # FIXME RBC 20060210 either nuke revision_id for clone, or # emit some sort of warning/error to the caller ?! return clone def open(self, a_bzrdir, name=None, _found=False, location=None, possible_transports=None, ignore_fallbacks=False, found_repository=None): """Return the branch that the branch reference in a_bzrdir points at. :param a_bzrdir: A BzrDir that contains a branch. :param name: Name of colocated branch to open, if any :param _found: a private parameter, do not use it. It is used to indicate if format probing has already be done. :param ignore_fallbacks: when set, no fallback branches will be opened (if there are any). Default is to open fallbacks. :param location: The location of the referenced branch. If unspecified, this will be determined from the branch reference in a_bzrdir. :param possible_transports: An optional reusable transports list. """ if name is None: name = a_bzrdir._get_selected_branch() if not _found: format = BranchFormatMetadir.find_format(a_bzrdir, name=name) if format.__class__ != self.__class__: raise AssertionError("wrong format %r found for %r" % (format, self)) if location is None: location = self.get_reference(a_bzrdir, name) real_bzrdir = controldir.ControlDir.open( location, possible_transports=possible_transports) result = real_bzrdir.open_branch(ignore_fallbacks=ignore_fallbacks, possible_transports=possible_transports) # this changes the behaviour of result.clone to create a new reference # rather than a copy of the content of the branch. # I did not use a proxy object because that needs much more extensive # testing, and we are only changing one behaviour at the moment. # If we decide to alter more behaviours - i.e. the implicit nickname # then this should be refactored to introduce a tested proxy branch # and a subclass of that for use in overriding clone() and .... # - RBC 20060210 result.clone = self._make_reference_clone_function(result) return result class BranchFormatRegistry(controldir.ControlComponentFormatRegistry): """Branch format registry.""" def __init__(self, other_registry=None): super(BranchFormatRegistry, self).__init__(other_registry) self._default_format = None def set_default(self, format): self._default_format = format def get_default(self): return self._default_format network_format_registry = registry.FormatRegistry() """Registry of formats indexed by their network name. The network name for a branch format is an identifier that can be used when referring to formats with smart server operations. See BranchFormat.network_name() for more detail. """ format_registry = BranchFormatRegistry(network_format_registry) # formats which have no format string are not discoverable # and not independently creatable, so are not registered. __format6 = BzrBranchFormat6() __format7 = BzrBranchFormat7() __format8 = BzrBranchFormat8() format_registry.register_lazy( "Bazaar-NG branch format 5\n", "bzrlib.branchfmt.fullhistory", "BzrBranchFormat5") format_registry.register(BranchReferenceFormat()) format_registry.register(__format6) format_registry.register(__format7) format_registry.register(__format8) format_registry.set_default(__format7) class BranchWriteLockResult(LogicalLockResult): """The result of write locking a branch. :ivar branch_token: The token obtained from the underlying branch lock, or None. :ivar unlock: A callable which will unlock the lock. """ def __init__(self, unlock, branch_token): LogicalLockResult.__init__(self, unlock) self.branch_token = branch_token def __repr__(self): return "BranchWriteLockResult(%s, %s)" % (self.branch_token, self.unlock) class BzrBranch(Branch, _RelockDebugMixin): """A branch stored in the actual filesystem. Note that it's "local" in the context of the filesystem; it doesn't really matter if it's on an nfs/smb/afs/coda/... share, as long as it's writable, and can be accessed via the normal filesystem API. :ivar _transport: Transport for file operations on this branch's control files, typically pointing to the .bzr/branch directory. :ivar repository: Repository for this branch. :ivar base: The url of the base directory for this branch; the one containing the .bzr directory. :ivar name: Optional colocated branch name as it exists in the control directory. """ def __init__(self, _format=None, _control_files=None, a_bzrdir=None, name=None, _repository=None, ignore_fallbacks=False, possible_transports=None): """Create new branch object at a particular location.""" if a_bzrdir is None: raise ValueError('a_bzrdir must be supplied') if name is None: raise ValueError('name must be supplied') self.bzrdir = a_bzrdir self._user_transport = self.bzrdir.transport.clone('..') if name != "": self._user_transport.set_segment_parameter( "branch", urlutils.escape(name)) self._base = self._user_transport.base self.name = name self._format = _format if _control_files is None: raise ValueError('BzrBranch _control_files is None') self.control_files = _control_files self._transport = _control_files._transport self.repository = _repository self.conf_store = None Branch.__init__(self, possible_transports) def __str__(self): return '%s(%s)' % (self.__class__.__name__, self.user_url) __repr__ = __str__ def _get_base(self): """Returns the directory containing the control directory.""" return self._base base = property(_get_base, doc="The URL for the root of this branch.") @property def user_transport(self): return self._user_transport def _get_config(self): return _mod_config.TransportConfig(self._transport, 'branch.conf') def _get_config_store(self): if self.conf_store is None: self.conf_store = _mod_config.BranchStore(self) return self.conf_store def _uncommitted_branch(self): """Return the branch that may contain uncommitted changes.""" master = self.get_master_branch() if master is not None: return master else: return self def store_uncommitted(self, creator): """Store uncommitted changes from a ShelfCreator. :param creator: The ShelfCreator containing uncommitted changes, or None to delete any stored changes. :raises: ChangesAlreadyStored if the branch already has changes. """ branch = self._uncommitted_branch() if creator is None: branch._transport.delete('stored-transform') return if branch._transport.has('stored-transform'): raise errors.ChangesAlreadyStored transform = StringIO() creator.write_shelf(transform) transform.seek(0) branch._transport.put_file('stored-transform', transform) def get_unshelver(self, tree): """Return a shelf.Unshelver for this branch and tree. :param tree: The tree to use to construct the Unshelver. :return: an Unshelver or None if no changes are stored. """ branch = self._uncommitted_branch() try: transform = branch._transport.get('stored-transform') except errors.NoSuchFile: return None return shelf.Unshelver.from_tree_and_shelf(tree, transform) def is_locked(self): return self.control_files.is_locked() def lock_write(self, token=None): """Lock the branch for write operations. :param token: A token to permit reacquiring a previously held and preserved lock. :return: A BranchWriteLockResult. """ if not self.is_locked(): self._note_lock('w') self.repository._warn_if_deprecated(self) self.repository.lock_write() took_lock = True else: took_lock = False try: return BranchWriteLockResult(self.unlock, self.control_files.lock_write(token=token)) except: if took_lock: self.repository.unlock() raise def lock_read(self): """Lock the branch for read operations. :return: A bzrlib.lock.LogicalLockResult. """ if not self.is_locked(): self._note_lock('r') self.repository._warn_if_deprecated(self) self.repository.lock_read() took_lock = True else: took_lock = False try: self.control_files.lock_read() return LogicalLockResult(self.unlock) except: if took_lock: self.repository.unlock() raise @only_raises(errors.LockNotHeld, errors.LockBroken) def unlock(self): if self.control_files._lock_count == 1 and self.conf_store is not None: self.conf_store.save_changes() try: self.control_files.unlock() finally: if not self.control_files.is_locked(): self.repository.unlock() # we just released the lock self._clear_cached_state() def peek_lock_mode(self): if self.control_files._lock_count == 0: return None else: return self.control_files._lock_mode def get_physical_lock_status(self): return self.control_files.get_physical_lock_status() @needs_read_lock def print_file(self, file, revision_id): """See Branch.print_file.""" return self.repository.print_file(file, revision_id) @needs_write_lock def set_last_revision_info(self, revno, revision_id): if not revision_id or not isinstance(revision_id, basestring): raise errors.InvalidRevisionId(revision_id=revision_id, branch=self) revision_id = _mod_revision.ensure_null(revision_id) old_revno, old_revid = self.last_revision_info() if self.get_append_revisions_only(): self._check_history_violation(revision_id) self._run_pre_change_branch_tip_hooks(revno, revision_id) self._write_last_revision_info(revno, revision_id) self._clear_cached_state() self._last_revision_info_cache = revno, revision_id self._run_post_change_branch_tip_hooks(old_revno, old_revid) def basis_tree(self): """See Branch.basis_tree.""" return self.repository.revision_tree(self.last_revision()) def _get_parent_location(self): _locs = ['parent', 'pull', 'x-pull'] for l in _locs: try: return self._transport.get_bytes(l).strip('\n') except errors.NoSuchFile: pass return None def get_stacked_on_url(self): raise errors.UnstackableBranchFormat(self._format, self.user_url) def set_push_location(self, location): """See Branch.set_push_location.""" self.get_config().set_user_option( 'push_location', location, store=_mod_config.STORE_LOCATION_NORECURSE) def _set_parent_location(self, url): if url is None: self._transport.delete('parent') else: self._transport.put_bytes('parent', url + '\n', mode=self.bzrdir._get_file_mode()) @needs_write_lock def unbind(self): """If bound, unbind""" return self.set_bound_location(None) @needs_write_lock def bind(self, other): """Bind this branch to the branch other. This does not push or pull data between the branches, though it does check for divergence to raise an error when the branches are not either the same, or one a prefix of the other. That behaviour may not be useful, so that check may be removed in future. :param other: The branch to bind to :type other: Branch """ # TODO: jam 20051230 Consider checking if the target is bound # It is debatable whether you should be able to bind to # a branch which is itself bound. # Committing is obviously forbidden, # but binding itself may not be. # Since we *have* to check at commit time, we don't # *need* to check here # we want to raise diverged if: # last_rev is not in the other_last_rev history, AND # other_last_rev is not in our history, and do it without pulling # history around self.set_bound_location(other.base) def get_bound_location(self): try: return self._transport.get_bytes('bound')[:-1] except errors.NoSuchFile: return None @needs_read_lock def get_master_branch(self, possible_transports=None): """Return the branch we are bound to. :return: Either a Branch, or None """ if self._master_branch_cache is None: self._master_branch_cache = self._get_master_branch( possible_transports) return self._master_branch_cache def _get_master_branch(self, possible_transports): bound_loc = self.get_bound_location() if not bound_loc: return None try: return Branch.open(bound_loc, possible_transports=possible_transports) except (errors.NotBranchError, errors.ConnectionError), e: raise errors.BoundBranchConnectionFailure( self, bound_loc, e) @needs_write_lock def set_bound_location(self, location): """Set the target where this branch is bound to. :param location: URL to the target branch """ self._master_branch_cache = None if location: self._transport.put_bytes('bound', location+'\n', mode=self.bzrdir._get_file_mode()) else: try: self._transport.delete('bound') except errors.NoSuchFile: return False return True @needs_write_lock def update(self, possible_transports=None): """Synchronise this branch with the master branch if any. :return: None or the last_revision that was pivoted out during the update. """ master = self.get_master_branch(possible_transports) if master is not None: old_tip = _mod_revision.ensure_null(self.last_revision()) self.pull(master, overwrite=True) if self.repository.get_graph().is_ancestor(old_tip, _mod_revision.ensure_null(self.last_revision())): return None return old_tip return None def _read_last_revision_info(self): revision_string = self._transport.get_bytes('last-revision') revno, revision_id = revision_string.rstrip('\n').split(' ', 1) revision_id = cache_utf8.get_cached_utf8(revision_id) revno = int(revno) return revno, revision_id def _write_last_revision_info(self, revno, revision_id): """Simply write out the revision id, with no checks. Use set_last_revision_info to perform this safely. Does not update the revision_history cache. """ revision_id = _mod_revision.ensure_null(revision_id) out_string = '%d %s\n' % (revno, revision_id) self._transport.put_bytes('last-revision', out_string, mode=self.bzrdir._get_file_mode()) @needs_write_lock def update_feature_flags(self, updated_flags): """Update the feature flags for this branch. :param updated_flags: Dictionary mapping feature names to necessities A necessity can be None to indicate the feature should be removed """ self._format._update_feature_flags(updated_flags) self.control_transport.put_bytes('format', self._format.as_string()) class BzrBranch8(BzrBranch): """A branch that stores tree-reference locations.""" def _open_hook(self, possible_transports=None): if self._ignore_fallbacks: return if possible_transports is None: possible_transports = [self.bzrdir.root_transport] try: url = self.get_stacked_on_url() except (errors.UnstackableRepositoryFormat, errors.NotStacked, errors.UnstackableBranchFormat): pass else: for hook in Branch.hooks['transform_fallback_location']: url = hook(self, url) if url is None: hook_name = Branch.hooks.get_hook_name(hook) raise AssertionError( "'transform_fallback_location' hook %s returned " "None, not a URL." % hook_name) self._activate_fallback_location(url, possible_transports=possible_transports) def __init__(self, *args, **kwargs): self._ignore_fallbacks = kwargs.get('ignore_fallbacks', False) super(BzrBranch8, self).__init__(*args, **kwargs) self._last_revision_info_cache = None self._reference_info = None def _clear_cached_state(self): super(BzrBranch8, self)._clear_cached_state() self._last_revision_info_cache = None self._reference_info = None def _check_history_violation(self, revision_id): current_revid = self.last_revision() last_revision = _mod_revision.ensure_null(current_revid) if _mod_revision.is_null(last_revision): return graph = self.repository.get_graph() for lh_ancestor in graph.iter_lefthand_ancestry(revision_id): if lh_ancestor == current_revid: return raise errors.AppendRevisionsOnlyViolation(self.user_url) def _gen_revision_history(self): """Generate the revision history from last revision """ last_revno, last_revision = self.last_revision_info() self._extend_partial_history(stop_index=last_revno-1) return list(reversed(self._partial_revision_history_cache)) @needs_write_lock def _set_parent_location(self, url): """Set the parent branch""" self._set_config_location('parent_location', url, make_relative=True) @needs_read_lock def _get_parent_location(self): """Set the parent branch""" return self._get_config_location('parent_location') @needs_write_lock def _set_all_reference_info(self, info_dict): """Replace all reference info stored in a branch. :param info_dict: A dict of {file_id: (tree_path, branch_location)} """ s = StringIO() writer = rio.RioWriter(s) for key, (tree_path, branch_location) in info_dict.iteritems(): stanza = rio.Stanza(file_id=key, tree_path=tree_path, branch_location=branch_location) writer.write_stanza(stanza) self._transport.put_bytes('references', s.getvalue()) self._reference_info = info_dict @needs_read_lock def _get_all_reference_info(self): """Return all the reference info stored in a branch. :return: A dict of {file_id: (tree_path, branch_location)} """ if self._reference_info is not None: return self._reference_info rio_file = self._transport.get('references') try: stanzas = rio.read_stanzas(rio_file) info_dict = dict((s['file_id'], (s['tree_path'], s['branch_location'])) for s in stanzas) finally: rio_file.close() self._reference_info = info_dict return info_dict def set_reference_info(self, file_id, tree_path, branch_location): """Set the branch location to use for a tree reference. :param file_id: The file-id of the tree reference. :param tree_path: The path of the tree reference in the tree. :param branch_location: The location of the branch to retrieve tree references from. """ info_dict = self._get_all_reference_info() info_dict[file_id] = (tree_path, branch_location) if None in (tree_path, branch_location): if tree_path is not None: raise ValueError('tree_path must be None when branch_location' ' is None.') if branch_location is not None: raise ValueError('branch_location must be None when tree_path' ' is None.') del info_dict[file_id] self._set_all_reference_info(info_dict) def get_reference_info(self, file_id): """Get the tree_path and branch_location for a tree reference. :return: a tuple of (tree_path, branch_location) """ return self._get_all_reference_info().get(file_id, (None, None)) def reference_parent(self, file_id, path, possible_transports=None): """Return the parent branch for a tree-reference file_id. :param file_id: The file_id of the tree reference :param path: The path of the file_id in the tree :return: A branch associated with the file_id """ branch_location = self.get_reference_info(file_id)[1] if branch_location is None: return Branch.reference_parent(self, file_id, path, possible_transports) branch_location = urlutils.join(self.user_url, branch_location) return Branch.open(branch_location, possible_transports=possible_transports) def set_push_location(self, location): """See Branch.set_push_location.""" self._set_config_location('push_location', location) def set_bound_location(self, location): """See Branch.set_push_location.""" self._master_branch_cache = None result = None conf = self.get_config_stack() if location is None: if not conf.get('bound'): return False else: conf.set('bound', 'False') return True else: self._set_config_location('bound_location', location, config=conf) conf.set('bound', 'True') return True def _get_bound_location(self, bound): """Return the bound location in the config file. Return None if the bound parameter does not match""" conf = self.get_config_stack() if conf.get('bound') != bound: return None return self._get_config_location('bound_location', config=conf) def get_bound_location(self): """See Branch.get_bound_location.""" return self._get_bound_location(True) def get_old_bound_location(self): """See Branch.get_old_bound_location""" return self._get_bound_location(False) def get_stacked_on_url(self): # you can always ask for the URL; but you might not be able to use it # if the repo can't support stacking. ## self._check_stackable_repo() # stacked_on_location is only ever defined in branch.conf, so don't # waste effort reading the whole stack of config files. conf = _mod_config.BranchOnlyStack(self) stacked_url = self._get_config_location('stacked_on_location', config=conf) if stacked_url is None: raise errors.NotStacked(self) return stacked_url.encode('utf-8') @needs_read_lock def get_rev_id(self, revno, history=None): """Find the revision id of the specified revno.""" if revno == 0: return _mod_revision.NULL_REVISION last_revno, last_revision_id = self.last_revision_info() if revno <= 0 or revno > last_revno: raise errors.NoSuchRevision(self, revno) if history is not None: return history[revno - 1] index = last_revno - revno if len(self._partial_revision_history_cache) <= index: self._extend_partial_history(stop_index=index) if len(self._partial_revision_history_cache) > index: return self._partial_revision_history_cache[index] else: raise errors.NoSuchRevision(self, revno) @needs_read_lock def revision_id_to_revno(self, revision_id): """Given a revision id, return its revno""" if _mod_revision.is_null(revision_id): return 0 try: index = self._partial_revision_history_cache.index(revision_id) except ValueError: try: self._extend_partial_history(stop_revision=revision_id) except errors.RevisionNotPresent, e: raise errors.GhostRevisionsHaveNoRevno(revision_id, e.revision_id) index = len(self._partial_revision_history_cache) - 1 if index < 0: raise errors.NoSuchRevision(self, revision_id) if self._partial_revision_history_cache[index] != revision_id: raise errors.NoSuchRevision(self, revision_id) return self.revno() - index class BzrBranch7(BzrBranch8): """A branch with support for a fallback repository.""" def set_reference_info(self, file_id, tree_path, branch_location): Branch.set_reference_info(self, file_id, tree_path, branch_location) def get_reference_info(self, file_id): Branch.get_reference_info(self, file_id) def reference_parent(self, file_id, path, possible_transports=None): return Branch.reference_parent(self, file_id, path, possible_transports) class BzrBranch6(BzrBranch7): """See BzrBranchFormat6 for the capabilities of this branch. This subclass of BzrBranch7 disables the new features BzrBranch7 added, i.e. stacking. """ def get_stacked_on_url(self): raise errors.UnstackableBranchFormat(self._format, self.user_url) ###################################################################### # results of operations class _Result(object): def _show_tag_conficts(self, to_file): if not getattr(self, 'tag_conflicts', None): return to_file.write('Conflicting tags:\n') for name, value1, value2 in self.tag_conflicts: to_file.write(' %s\n' % (name, )) class PullResult(_Result): """Result of a Branch.pull operation. :ivar old_revno: Revision number before pull. :ivar new_revno: Revision number after pull. :ivar old_revid: Tip revision id before pull. :ivar new_revid: Tip revision id after pull. :ivar source_branch: Source (local) branch object. (read locked) :ivar master_branch: Master branch of the target, or the target if no Master :ivar local_branch: target branch if there is a Master, else None :ivar target_branch: Target/destination branch object. (write locked) :ivar tag_conflicts: A list of tag conflicts, see BasicTags.merge_to :ivar tag_updates: A dict with new tags, see BasicTags.merge_to """ def report(self, to_file): tag_conflicts = getattr(self, "tag_conflicts", None) tag_updates = getattr(self, "tag_updates", None) if not is_quiet(): if self.old_revid != self.new_revid: to_file.write('Now on revision %d.\n' % self.new_revno) if tag_updates: to_file.write('%d tag(s) updated.\n' % len(tag_updates)) if self.old_revid == self.new_revid and not tag_updates: if not tag_conflicts: to_file.write('No revisions or tags to pull.\n') else: to_file.write('No revisions to pull.\n') self._show_tag_conficts(to_file) class BranchPushResult(_Result): """Result of a Branch.push operation. :ivar old_revno: Revision number (eg 10) of the target before push. :ivar new_revno: Revision number (eg 12) of the target after push. :ivar old_revid: Tip revision id (eg joe@foo.com-1234234-aoeua34) of target before the push. :ivar new_revid: Tip revision id (eg joe@foo.com-5676566-boa234a) of target after the push. :ivar source_branch: Source branch object that the push was from. This is read locked, and generally is a local (and thus low latency) branch. :ivar master_branch: If target is a bound branch, the master branch of target, or target itself. Always write locked. :ivar target_branch: The direct Branch where data is being sent (write locked). :ivar local_branch: If the target is a bound branch this will be the target, otherwise it will be None. """ def report(self, to_file): # TODO: This function gets passed a to_file, but then # ignores it and calls note() instead. This is also # inconsistent with PullResult(), which writes to stdout. # -- JRV20110901, bug #838853 tag_conflicts = getattr(self, "tag_conflicts", None) tag_updates = getattr(self, "tag_updates", None) if not is_quiet(): if self.old_revid != self.new_revid: note(gettext('Pushed up to revision %d.') % self.new_revno) if tag_updates: note(ngettext('%d tag updated.', '%d tags updated.', len(tag_updates)) % len(tag_updates)) if self.old_revid == self.new_revid and not tag_updates: if not tag_conflicts: note(gettext('No new revisions or tags to push.')) else: note(gettext('No new revisions to push.')) self._show_tag_conficts(to_file) class BranchCheckResult(object): """Results of checking branch consistency. :see: Branch.check """ def __init__(self, branch): self.branch = branch self.errors = [] def report_results(self, verbose): """Report the check results via trace.note. :param verbose: Requests more detailed display of what was checked, if any. """ note(gettext('checked branch {0} format {1}').format( self.branch.user_url, self.branch._format)) for error in self.errors: note(gettext('found error:%s'), error) class Converter5to6(object): """Perform an in-place upgrade of format 5 to format 6""" def convert(self, branch): # Data for 5 and 6 can peacefully coexist. format = BzrBranchFormat6() new_branch = format.open(branch.bzrdir, _found=True) # Copy source data into target new_branch._write_last_revision_info(*branch.last_revision_info()) new_branch.lock_write() try: new_branch.set_parent(branch.get_parent()) new_branch.set_bound_location(branch.get_bound_location()) new_branch.set_push_location(branch.get_push_location()) finally: new_branch.unlock() # New branch has no tags by default new_branch.tags._set_tag_dict({}) # Copying done; now update target format new_branch._transport.put_bytes('format', format.as_string(), mode=new_branch.bzrdir._get_file_mode()) # Clean up old files new_branch._transport.delete('revision-history') branch.lock_write() try: try: branch.set_parent(None) except errors.NoSuchFile: pass branch.set_bound_location(None) finally: branch.unlock() class Converter6to7(object): """Perform an in-place upgrade of format 6 to format 7""" def convert(self, branch): format = BzrBranchFormat7() branch._set_config_location('stacked_on_location', '') # update target format branch._transport.put_bytes('format', format.as_string()) class Converter7to8(object): """Perform an in-place upgrade of format 7 to format 8""" def convert(self, branch): format = BzrBranchFormat8() branch._transport.put_bytes('references', '') # update target format branch._transport.put_bytes('format', format.as_string()) class InterBranch(InterObject): """This class represents operations taking place between two branches. Its instances have methods like pull() and push() and contain references to the source and target repositories these operations can be carried out on. """ _optimisers = [] """The available optimised InterBranch types.""" @classmethod def _get_branch_formats_to_test(klass): """Return an iterable of format tuples for testing. :return: An iterable of (from_format, to_format) to use when testing this InterBranch class. Each InterBranch class should define this method itself. """ raise NotImplementedError(klass._get_branch_formats_to_test) @needs_write_lock def pull(self, overwrite=False, stop_revision=None, possible_transports=None, local=False): """Mirror source into target branch. The target branch is considered to be 'local', having low latency. :returns: PullResult instance """ raise NotImplementedError(self.pull) @needs_write_lock def push(self, overwrite=False, stop_revision=None, lossy=False, _override_hook_source_branch=None): """Mirror the source branch into the target branch. The source branch is considered to be 'local', having low latency. """ raise NotImplementedError(self.push) @needs_write_lock def copy_content_into(self, revision_id=None): """Copy the content of source into target revision_id: if not None, the revision history in the new branch will be truncated to end with revision_id. """ raise NotImplementedError(self.copy_content_into) @needs_write_lock def fetch(self, stop_revision=None, limit=None): """Fetch revisions. :param stop_revision: Last revision to fetch :param limit: Optional rough limit of revisions to fetch """ raise NotImplementedError(self.fetch) def _fix_overwrite_type(overwrite): if isinstance(overwrite, bool): if overwrite: return ["history", "tags"] else: return [] return overwrite class GenericInterBranch(InterBranch): """InterBranch implementation that uses public Branch functions.""" @classmethod def is_compatible(klass, source, target): # GenericBranch uses the public API, so always compatible return True @classmethod def _get_branch_formats_to_test(klass): return [(format_registry.get_default(), format_registry.get_default())] @classmethod def unwrap_format(klass, format): if isinstance(format, remote.RemoteBranchFormat): format._ensure_real() return format._custom_format return format @needs_write_lock def copy_content_into(self, revision_id=None): """Copy the content of source into target revision_id: if not None, the revision history in the new branch will be truncated to end with revision_id. """ self.source.update_references(self.target) self.source._synchronize_history(self.target, revision_id) try: parent = self.source.get_parent() except errors.InaccessibleParent, e: mutter('parent was not accessible to copy: %s', e) else: if parent: self.target.set_parent(parent) if self.source._push_should_merge_tags(): self.source.tags.merge_to(self.target.tags) @needs_write_lock def fetch(self, stop_revision=None, limit=None): if self.target.base == self.source.base: return (0, []) self.source.lock_read() try: fetch_spec_factory = fetch.FetchSpecFactory() fetch_spec_factory.source_branch = self.source fetch_spec_factory.source_branch_stop_revision_id = stop_revision fetch_spec_factory.source_repo = self.source.repository fetch_spec_factory.target_repo = self.target.repository fetch_spec_factory.target_repo_kind = fetch.TargetRepoKinds.PREEXISTING fetch_spec_factory.limit = limit fetch_spec = fetch_spec_factory.make_fetch_spec() return self.target.repository.fetch(self.source.repository, fetch_spec=fetch_spec) finally: self.source.unlock() @needs_write_lock def _update_revisions(self, stop_revision=None, overwrite=False, graph=None): other_revno, other_last_revision = self.source.last_revision_info() stop_revno = None # unknown if stop_revision is None: stop_revision = other_last_revision if _mod_revision.is_null(stop_revision): # if there are no commits, we're done. return stop_revno = other_revno # what's the current last revision, before we fetch [and change it # possibly] last_rev = _mod_revision.ensure_null(self.target.last_revision()) # we fetch here so that we don't process data twice in the common # case of having something to pull, and so that the check for # already merged can operate on the just fetched graph, which will # be cached in memory. self.fetch(stop_revision=stop_revision) # Check to see if one is an ancestor of the other if not overwrite: if graph is None: graph = self.target.repository.get_graph() if self.target._check_if_descendant_or_diverged( stop_revision, last_rev, graph, self.source): # stop_revision is a descendant of last_rev, but we aren't # overwriting, so we're done. return if stop_revno is None: if graph is None: graph = self.target.repository.get_graph() this_revno, this_last_revision = \ self.target.last_revision_info() stop_revno = graph.find_distance_to_null(stop_revision, [(other_last_revision, other_revno), (this_last_revision, this_revno)]) self.target.set_last_revision_info(stop_revno, stop_revision) @needs_write_lock def pull(self, overwrite=False, stop_revision=None, possible_transports=None, run_hooks=True, _override_hook_target=None, local=False): """Pull from source into self, updating my master if any. :param run_hooks: Private parameter - if false, this branch is being called because it's the master of the primary branch, so it should not run its hooks. """ bound_location = self.target.get_bound_location() if local and not bound_location: raise errors.LocalRequiresBoundBranch() master_branch = None source_is_master = False if bound_location: # bound_location comes from a config file, some care has to be # taken to relate it to source.user_url normalized = urlutils.normalize_url(bound_location) try: relpath = self.source.user_transport.relpath(normalized) source_is_master = (relpath == '') except (errors.PathNotChild, errors.InvalidURL): source_is_master = False if not local and bound_location and not source_is_master: # not pulling from master, so we need to update master. master_branch = self.target.get_master_branch(possible_transports) master_branch.lock_write() try: if master_branch: # pull from source into master. master_branch.pull(self.source, overwrite, stop_revision, run_hooks=False) return self._pull(overwrite, stop_revision, _hook_master=master_branch, run_hooks=run_hooks, _override_hook_target=_override_hook_target, merge_tags_to_master=not source_is_master) finally: if master_branch: master_branch.unlock() def push(self, overwrite=False, stop_revision=None, lossy=False, _override_hook_source_branch=None): """See InterBranch.push. This is the basic concrete implementation of push() :param _override_hook_source_branch: If specified, run the hooks passing this Branch as the source, rather than self. This is for use of RemoteBranch, where push is delegated to the underlying vfs-based Branch. """ if lossy: raise errors.LossyPushToSameVCS(self.source, self.target) # TODO: Public option to disable running hooks - should be trivial but # needs tests. op = cleanup.OperationWithCleanups(self._push_with_bound_branches) op.add_cleanup(self.source.lock_read().unlock) op.add_cleanup(self.target.lock_write().unlock) return op.run(overwrite, stop_revision, _override_hook_source_branch=_override_hook_source_branch) def _basic_push(self, overwrite, stop_revision): """Basic implementation of push without bound branches or hooks. Must be called with source read locked and target write locked. """ result = BranchPushResult() result.source_branch = self.source result.target_branch = self.target result.old_revno, result.old_revid = self.target.last_revision_info() self.source.update_references(self.target) overwrite = _fix_overwrite_type(overwrite) if result.old_revid != stop_revision: # We assume that during 'push' this repository is closer than # the target. graph = self.source.repository.get_graph(self.target.repository) self._update_revisions(stop_revision, overwrite=("history" in overwrite), graph=graph) if self.source._push_should_merge_tags(): result.tag_updates, result.tag_conflicts = ( self.source.tags.merge_to( self.target.tags, "tags" in overwrite)) result.new_revno, result.new_revid = self.target.last_revision_info() return result def _push_with_bound_branches(self, operation, overwrite, stop_revision, _override_hook_source_branch=None): """Push from source into target, and into target's master if any. """ def _run_hooks(): if _override_hook_source_branch: result.source_branch = _override_hook_source_branch for hook in Branch.hooks['post_push']: hook(result) bound_location = self.target.get_bound_location() if bound_location and self.target.base != bound_location: # there is a master branch. # # XXX: Why the second check? Is it even supported for a branch to # be bound to itself? -- mbp 20070507 master_branch = self.target.get_master_branch() master_branch.lock_write() operation.add_cleanup(master_branch.unlock) # push into the master from the source branch. master_inter = InterBranch.get(self.source, master_branch) master_inter._basic_push(overwrite, stop_revision) # and push into the target branch from the source. Note that # we push from the source branch again, because it's considered # the highest bandwidth repository. result = self._basic_push(overwrite, stop_revision) result.master_branch = master_branch result.local_branch = self.target else: master_branch = None # no master branch result = self._basic_push(overwrite, stop_revision) # TODO: Why set master_branch and local_branch if there's no # binding? Maybe cleaner to just leave them unset? -- mbp # 20070504 result.master_branch = self.target result.local_branch = None _run_hooks() return result def _pull(self, overwrite=False, stop_revision=None, possible_transports=None, _hook_master=None, run_hooks=True, _override_hook_target=None, local=False, merge_tags_to_master=True): """See Branch.pull. This function is the core worker, used by GenericInterBranch.pull to avoid duplication when pulling source->master and source->local. :param _hook_master: Private parameter - set the branch to be supplied as the master to pull hooks. :param run_hooks: Private parameter - if false, this branch is being called because it's the master of the primary branch, so it should not run its hooks. is being called because it's the master of the primary branch, so it should not run its hooks. :param _override_hook_target: Private parameter - set the branch to be supplied as the target_branch to pull hooks. :param local: Only update the local branch, and not the bound branch. """ # This type of branch can't be bound. if local: raise errors.LocalRequiresBoundBranch() result = PullResult() result.source_branch = self.source if _override_hook_target is None: result.target_branch = self.target else: result.target_branch = _override_hook_target self.source.lock_read() try: # We assume that during 'pull' the target repository is closer than # the source one. self.source.update_references(self.target) graph = self.target.repository.get_graph(self.source.repository) # TODO: Branch formats should have a flag that indicates # that revno's are expensive, and pull() should honor that flag. # -- JRV20090506 result.old_revno, result.old_revid = \ self.target.last_revision_info() overwrite = _fix_overwrite_type(overwrite) self._update_revisions(stop_revision, overwrite=("history" in overwrite), graph=graph) # TODO: The old revid should be specified when merging tags, # so a tags implementation that versions tags can only # pull in the most recent changes. -- JRV20090506 result.tag_updates, result.tag_conflicts = ( self.source.tags.merge_to(self.target.tags, "tags" in overwrite, ignore_master=not merge_tags_to_master)) result.new_revno, result.new_revid = self.target.last_revision_info() if _hook_master: result.master_branch = _hook_master result.local_branch = result.target_branch else: result.master_branch = result.target_branch result.local_branch = None if run_hooks: for hook in Branch.hooks['post_pull']: hook(result) finally: self.source.unlock() return result InterBranch.register_optimiser(GenericInterBranch) bzr-2.7.0/bzrlib/branchbuilder.py0000644000000000000000000003165611673635356015137 0ustar 00000000000000# Copyright (C) 2007, 2008, 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Utility for create branches with particular contents.""" from __future__ import absolute_import from bzrlib import ( controldir, commit, errors, memorytree, revision, ) class BranchBuilder(object): r"""A BranchBuilder aids creating Branches with particular shapes. The expected way to use BranchBuilder is to construct a BranchBuilder on the transport you want your branch on, and then call appropriate build_ methods on it to get the shape of history you want. This is meant as a helper for the test suite, not as a general class for real data. For instance: >>> from bzrlib.transport.memory import MemoryTransport >>> builder = BranchBuilder(MemoryTransport("memory:///")) >>> builder.start_series() >>> builder.build_snapshot('rev-id', None, [ ... ('add', ('', 'root-id', 'directory', '')), ... ('add', ('filename', 'f-id', 'file', 'content\n'))]) 'rev-id' >>> builder.build_snapshot('rev2-id', ['rev-id'], ... [('modify', ('f-id', 'new-content\n'))]) 'rev2-id' >>> builder.finish_series() >>> branch = builder.get_branch() :ivar _tree: This is a private member which is not meant to be modified by users of this class. While a 'series' is in progress, it should hold a MemoryTree with the contents of the last commit (ready to be modified by the next build_snapshot command) with a held write lock. Outside of a series in progress, it should be None. """ def __init__(self, transport=None, format=None, branch=None): """Construct a BranchBuilder on transport. :param transport: The transport the branch should be created on. If the path of the transport does not exist but its parent does it will be created. :param format: Either a BzrDirFormat, or the name of a format in the controldir format registry for the branch to be built. :param branch: An already constructed branch to use. This param is mutually exclusive with the transport and format params. """ if branch is not None: if format is not None: raise AssertionError( "branch and format kwargs are mutually exclusive") if transport is not None: raise AssertionError( "branch and transport kwargs are mutually exclusive") self._branch = branch else: if not transport.has('.'): transport.mkdir('.') if format is None: format = 'default' if isinstance(format, str): format = controldir.format_registry.make_bzrdir(format) self._branch = controldir.ControlDir.create_branch_convenience( transport.base, format=format, force_new_tree=False) self._tree = None def build_commit(self, parent_ids=None, allow_leftmost_as_ghost=False, **commit_kwargs): """Build a commit on the branch. This makes a commit with no real file content for when you only want to look at the revision graph structure. :param commit_kwargs: Arguments to pass through to commit, such as timestamp. """ if parent_ids is not None: if len(parent_ids) == 0: base_id = revision.NULL_REVISION else: base_id = parent_ids[0] if base_id != self._branch.last_revision(): self._move_branch_pointer(base_id, allow_leftmost_as_ghost=allow_leftmost_as_ghost) tree = memorytree.MemoryTree.create_on_branch(self._branch) tree.lock_write() try: if parent_ids is not None: tree.set_parent_ids(parent_ids, allow_leftmost_as_ghost=allow_leftmost_as_ghost) tree.add('') return self._do_commit(tree, **commit_kwargs) finally: tree.unlock() def _do_commit(self, tree, message=None, message_callback=None, **kwargs): reporter = commit.NullCommitReporter() if message is None and message_callback is None: message = u'commit %d' % (self._branch.revno() + 1,) return tree.commit(message, message_callback=message_callback, reporter=reporter, **kwargs) def _move_branch_pointer(self, new_revision_id, allow_leftmost_as_ghost=False): """Point self._branch to a different revision id.""" self._branch.lock_write() try: # We don't seem to have a simple set_last_revision(), so we # implement it here. cur_revno, cur_revision_id = self._branch.last_revision_info() try: g = self._branch.repository.get_graph() new_revno = g.find_distance_to_null(new_revision_id, [(cur_revision_id, cur_revno)]) self._branch.set_last_revision_info(new_revno, new_revision_id) except errors.GhostRevisionsHaveNoRevno: if not allow_leftmost_as_ghost: raise new_revno = 1 finally: self._branch.unlock() if self._tree is not None: # We are currently processing a series, but when switching branch # pointers, it is easiest to just create a new memory tree. # That way we are sure to have the right files-on-disk # We are cheating a little bit here, and locking the new tree # before the old tree is unlocked. But that way the branch stays # locked throughout. new_tree = memorytree.MemoryTree.create_on_branch(self._branch) new_tree.lock_write() self._tree.unlock() self._tree = new_tree def start_series(self): """We will be creating a series of commits. This allows us to hold open the locks while we are processing. Make sure to call 'finish_series' when you are done. """ if self._tree is not None: raise AssertionError('You cannot start a new series while a' ' series is already going.') self._tree = memorytree.MemoryTree.create_on_branch(self._branch) self._tree.lock_write() def finish_series(self): """Call this after start_series to unlock the various objects.""" self._tree.unlock() self._tree = None def build_snapshot(self, revision_id, parent_ids, actions, message=None, timestamp=None, allow_leftmost_as_ghost=False, committer=None, timezone=None, message_callback=None): """Build a commit, shaped in a specific way. Most of the actions are self-explanatory. 'flush' is special action to break a series of actions into discrete steps so that complex changes (such as unversioning a file-id and re-adding it with a different kind) can be expressed in a way that will clearly work. :param revision_id: The handle for the new commit, can be None :param parent_ids: A list of parent_ids to use for the commit. It can be None, which indicates to use the last commit. :param actions: A list of actions to perform. Supported actions are: ('add', ('path', 'file-id', 'kind', 'content' or None)) ('modify', ('file-id', 'new-content')) ('unversion', 'file-id') ('rename', ('orig-path', 'new-path')) ('flush', None) :param message: An optional commit message, if not supplied, a default commit message will be written. :param message_callback: A message callback to use for the commit, as per mutabletree.commit. :param timestamp: If non-None, set the timestamp of the commit to this value. :param timezone: An optional timezone for timestamp. :param committer: An optional username to use for commit :param allow_leftmost_as_ghost: True if the leftmost parent should be permitted to be a ghost. :return: The revision_id of the new commit """ if parent_ids is not None: if len(parent_ids) == 0: base_id = revision.NULL_REVISION else: base_id = parent_ids[0] if base_id != self._branch.last_revision(): self._move_branch_pointer(base_id, allow_leftmost_as_ghost=allow_leftmost_as_ghost) if self._tree is not None: tree = self._tree else: tree = memorytree.MemoryTree.create_on_branch(self._branch) tree.lock_write() try: if parent_ids is not None: tree.set_parent_ids(parent_ids, allow_leftmost_as_ghost=allow_leftmost_as_ghost) # Unfortunately, MemoryTree.add(directory) just creates an # inventory entry. And the only public function to create a # directory is MemoryTree.mkdir() which creates the directory, but # also always adds it. So we have to use a multi-pass setup. pending = _PendingActions() for action, info in actions: if action == 'add': path, file_id, kind, content = info if kind == 'directory': pending.to_add_directories.append((path, file_id)) else: pending.to_add_files.append(path) pending.to_add_file_ids.append(file_id) pending.to_add_kinds.append(kind) if content is not None: pending.new_contents[file_id] = content elif action == 'modify': file_id, content = info pending.new_contents[file_id] = content elif action == 'unversion': pending.to_unversion_ids.add(info) elif action == 'rename': from_relpath, to_relpath = info pending.to_rename.append((from_relpath, to_relpath)) elif action == 'flush': self._flush_pending(tree, pending) pending = _PendingActions() else: raise ValueError('Unknown build action: "%s"' % (action,)) self._flush_pending(tree, pending) return self._do_commit(tree, message=message, rev_id=revision_id, timestamp=timestamp, timezone=timezone, committer=committer, message_callback=message_callback) finally: tree.unlock() def _flush_pending(self, tree, pending): """Flush the pending actions in 'pending', i.e. apply them to 'tree'.""" for path, file_id in pending.to_add_directories: if path == '': old_id = tree.path2id(path) if old_id is not None and old_id in pending.to_unversion_ids: # We're overwriting this path, no need to unversion pending.to_unversion_ids.discard(old_id) # Special case, because the path already exists tree.add([path], [file_id], ['directory']) else: tree.mkdir(path, file_id) for from_relpath, to_relpath in pending.to_rename: tree.rename_one(from_relpath, to_relpath) if pending.to_unversion_ids: tree.unversion(pending.to_unversion_ids) tree.add(pending.to_add_files, pending.to_add_file_ids, pending.to_add_kinds) for file_id, content in pending.new_contents.iteritems(): tree.put_file_bytes_non_atomic(file_id, content) def get_branch(self): """Return the branch created by the builder.""" return self._branch class _PendingActions(object): """Pending actions for build_snapshot to take. This is just a simple class to hold a bunch of the intermediate state of build_snapshot in single object. """ def __init__(self): self.to_add_directories = [] self.to_add_files = [] self.to_add_file_ids = [] self.to_add_kinds = [] self.new_contents = {} self.to_unversion_ids = set() self.to_rename = [] bzr-2.7.0/bzrlib/branchfmt/0000755000000000000000000000000011735374147013707 5ustar 00000000000000bzr-2.7.0/bzrlib/breakin.py0000644000000000000000000000557311673360271013734 0ustar 00000000000000# Copyright (C) 2007, 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import os import signal _breakin_signal_number = None _breakin_signal_name = None def _debug(signal_number, interrupted_frame): import pdb import sys sys.stderr.write("** %s received, entering debugger\n" "** Type 'c' to continue or 'q' to stop the process\n" "** Or %s again to quit (and possibly dump core)\n" % (_breakin_signal_name, _breakin_signal_name)) # It seems that on Windows, when sys.stderr is to a PIPE, then we need to # flush. Not sure why it is buffered, but that seems to be the case. sys.stderr.flush() # restore default meaning so that you can kill the process by hitting it # twice signal.signal(_breakin_signal_number, signal.SIG_DFL) try: pdb.set_trace() finally: signal.signal(_breakin_signal_number, _debug) def determine_signal(): global _breakin_signal_number global _breakin_signal_name if _breakin_signal_number is not None: return _breakin_signal_number # Note: As near as I can tell, Windows is the only one to define SIGBREAK, # and other platforms defined SIGQUIT. There doesn't seem to be a # platform that defines both. # -- jam 2009-07-30 sigquit = getattr(signal, 'SIGQUIT', None) sigbreak = getattr(signal, 'SIGBREAK', None) if sigquit is not None: _breakin_signal_number = sigquit _breakin_signal_name = 'SIGQUIT' elif sigbreak is not None: _breakin_signal_number = sigbreak _breakin_signal_name = 'SIGBREAK' return _breakin_signal_number def hook_debugger_to_signal(): """Add a signal handler so we drop into the debugger. On Unix, this is hooked into SIGQUIT (C-\\), and on Windows, this is hooked into SIGBREAK (C-Pause). """ # when sigquit (C-\) or sigbreak (C-Pause) is received go into pdb if os.environ.get('BZR_SIGQUIT_PDB', '1') == '0': # User explicitly requested we don't support this return sig = determine_signal() if sig is None: return # print 'hooking into %s' % (_breakin_signal_name,) signal.signal(sig, _debug) bzr-2.7.0/bzrlib/btree_index.py0000644000000000000000000020762212146160721014602 0ustar 00000000000000# Copyright (C) 2008-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # """B+Tree indices""" from __future__ import absolute_import import cStringIO from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import bisect import math import tempfile import zlib """) from bzrlib import ( chunk_writer, debug, errors, fifo_cache, index, lru_cache, osutils, static_tuple, trace, transport, ) from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN _BTSIGNATURE = "B+Tree Graph Index 2\n" _OPTION_ROW_LENGTHS = "row_lengths=" _LEAF_FLAG = "type=leaf\n" _INTERNAL_FLAG = "type=internal\n" _INTERNAL_OFFSET = "offset=" _RESERVED_HEADER_BYTES = 120 _PAGE_SIZE = 4096 # 4K per page: 4MB - 1000 entries _NODE_CACHE_SIZE = 1000 class _BuilderRow(object): """The stored state accumulated while writing out a row in the index. :ivar spool: A temporary file used to accumulate nodes for this row in the tree. :ivar nodes: The count of nodes emitted so far. """ def __init__(self): """Create a _BuilderRow.""" self.nodes = 0 self.spool = None# tempfile.TemporaryFile(prefix='bzr-index-row-') self.writer = None def finish_node(self, pad=True): byte_lines, _, padding = self.writer.finish() if self.nodes == 0: self.spool = cStringIO.StringIO() # padded note: self.spool.write("\x00" * _RESERVED_HEADER_BYTES) elif self.nodes == 1: # We got bigger than 1 node, switch to a temp file spool = tempfile.TemporaryFile(prefix='bzr-index-row-') spool.write(self.spool.getvalue()) self.spool = spool skipped_bytes = 0 if not pad and padding: del byte_lines[-1] skipped_bytes = padding self.spool.writelines(byte_lines) remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE if remainder != 0: raise AssertionError("incorrect node length: %d, %d" % (self.spool.tell(), remainder)) self.nodes += 1 self.writer = None class _InternalBuilderRow(_BuilderRow): """The stored state accumulated while writing out internal rows.""" def finish_node(self, pad=True): if not pad: raise AssertionError("Must pad internal nodes only.") _BuilderRow.finish_node(self) class _LeafBuilderRow(_BuilderRow): """The stored state accumulated while writing out a leaf rows.""" class BTreeBuilder(index.GraphIndexBuilder): """A Builder for B+Tree based Graph indices. The resulting graph has the structure: _SIGNATURE OPTIONS NODES _SIGNATURE := 'B+Tree Graph Index 1' NEWLINE OPTIONS := REF_LISTS KEY_ELEMENTS LENGTH REF_LISTS := 'node_ref_lists=' DIGITS NEWLINE KEY_ELEMENTS := 'key_elements=' DIGITS NEWLINE LENGTH := 'len=' DIGITS NEWLINE ROW_LENGTHS := 'row_lengths' DIGITS (COMMA DIGITS)* NODES := NODE_COMPRESSED* NODE_COMPRESSED:= COMPRESSED_BYTES{4096} NODE_RAW := INTERNAL | LEAF INTERNAL := INTERNAL_FLAG POINTERS LEAF := LEAF_FLAG ROWS KEY_ELEMENT := Not-whitespace-utf8 KEY := KEY_ELEMENT (NULL KEY_ELEMENT)* ROWS := ROW* ROW := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE ABSENT := 'a' REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1} REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)? REFERENCE := KEY VALUE := no-newline-no-null-bytes """ def __init__(self, reference_lists=0, key_elements=1, spill_at=100000): """See GraphIndexBuilder.__init__. :param spill_at: Optional parameter controlling the maximum number of nodes that BTreeBuilder will hold in memory. """ index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists, key_elements=key_elements) self._spill_at = spill_at self._backing_indices = [] # A map of {key: (node_refs, value)} self._nodes = {} # Indicate it hasn't been built yet self._nodes_by_key = None self._optimize_for_size = False def add_node(self, key, value, references=()): """Add a node to the index. If adding the node causes the builder to reach its spill_at threshold, disk spilling will be triggered. :param key: The key. keys are non-empty tuples containing as many whitespace-free utf8 bytestrings as the key length defined for this index. :param references: An iterable of iterables of keys. Each is a reference to another key. :param value: The value to associate with the key. It may be any bytes as long as it does not contain \\0 or \\n. """ # Ensure that 'key' is a StaticTuple key = static_tuple.StaticTuple.from_sequence(key).intern() # we don't care about absent_references node_refs, _ = self._check_key_ref_value(key, references, value) if key in self._nodes: raise errors.BadIndexDuplicateKey(key, self) self._nodes[key] = static_tuple.StaticTuple(node_refs, value) if self._nodes_by_key is not None and self._key_length > 1: self._update_nodes_by_key(key, value, node_refs) if len(self._nodes) < self._spill_at: return self._spill_mem_keys_to_disk() def _spill_mem_keys_to_disk(self): """Write the in memory keys down to disk to cap memory consumption. If we already have some keys written to disk, we will combine them so as to preserve the sorted order. The algorithm for combining uses powers of two. So on the first spill, write all mem nodes into a single index. On the second spill, combine the mem nodes with the nodes on disk to create a 2x sized disk index and get rid of the first index. On the third spill, create a single new disk index, which will contain the mem nodes, and preserve the existing 2x sized index. On the fourth, combine mem with the first and second indexes, creating a new one of size 4x. On the fifth create a single new one, etc. """ if self._combine_backing_indices: (new_backing_file, size, backing_pos) = self._spill_mem_keys_and_combine() else: new_backing_file, size = self._spill_mem_keys_without_combining() # Note: The transport here isn't strictly needed, because we will use # direct access to the new_backing._file object new_backing = BTreeGraphIndex(transport.get_transport_from_path('.'), '', size) # GC will clean up the file new_backing._file = new_backing_file if self._combine_backing_indices: if len(self._backing_indices) == backing_pos: self._backing_indices.append(None) self._backing_indices[backing_pos] = new_backing for backing_pos in range(backing_pos): self._backing_indices[backing_pos] = None else: self._backing_indices.append(new_backing) self._nodes = {} self._nodes_by_key = None def _spill_mem_keys_without_combining(self): return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False) def _spill_mem_keys_and_combine(self): iterators_to_combine = [self._iter_mem_nodes()] pos = -1 for pos, backing in enumerate(self._backing_indices): if backing is None: pos -= 1 break iterators_to_combine.append(backing.iter_all_entries()) backing_pos = pos + 1 new_backing_file, size = \ self._write_nodes(self._iter_smallest(iterators_to_combine), allow_optimize=False) return new_backing_file, size, backing_pos def add_nodes(self, nodes): """Add nodes to the index. :param nodes: An iterable of (key, node_refs, value) entries to add. """ if self.reference_lists: for (key, value, node_refs) in nodes: self.add_node(key, value, node_refs) else: for (key, value) in nodes: self.add_node(key, value) def _iter_mem_nodes(self): """Iterate over the nodes held in memory.""" nodes = self._nodes if self.reference_lists: for key in sorted(nodes): references, value = nodes[key] yield self, key, value, references else: for key in sorted(nodes): references, value = nodes[key] yield self, key, value def _iter_smallest(self, iterators_to_combine): if len(iterators_to_combine) == 1: for value in iterators_to_combine[0]: yield value return current_values = [] for iterator in iterators_to_combine: try: current_values.append(iterator.next()) except StopIteration: current_values.append(None) last = None while True: # Decorate candidates with the value to allow 2.4's min to be used. candidates = [(item[1][1], item) for item in enumerate(current_values) if item[1] is not None] if not len(candidates): return selected = min(candidates) # undecorate back to (pos, node) selected = selected[1] if last == selected[1][1]: raise errors.BadIndexDuplicateKey(last, self) last = selected[1][1] # Yield, with self as the index yield (self,) + selected[1][1:] pos = selected[0] try: current_values[pos] = iterators_to_combine[pos].next() except StopIteration: current_values[pos] = None def _add_key(self, string_key, line, rows, allow_optimize=True): """Add a key to the current chunk. :param string_key: The key to add. :param line: The fully serialised key and value. :param allow_optimize: If set to False, prevent setting the optimize flag when writing out. This is used by the _spill_mem_keys_to_disk functionality. """ new_leaf = False if rows[-1].writer is None: # opening a new leaf chunk; new_leaf = True for pos, internal_row in enumerate(rows[:-1]): # flesh out any internal nodes that are needed to # preserve the height of the tree if internal_row.writer is None: length = _PAGE_SIZE if internal_row.nodes == 0: length -= _RESERVED_HEADER_BYTES # padded if allow_optimize: optimize_for_size = self._optimize_for_size else: optimize_for_size = False internal_row.writer = chunk_writer.ChunkWriter(length, 0, optimize_for_size=optimize_for_size) internal_row.writer.write(_INTERNAL_FLAG) internal_row.writer.write(_INTERNAL_OFFSET + str(rows[pos + 1].nodes) + "\n") # add a new leaf length = _PAGE_SIZE if rows[-1].nodes == 0: length -= _RESERVED_HEADER_BYTES # padded rows[-1].writer = chunk_writer.ChunkWriter(length, optimize_for_size=self._optimize_for_size) rows[-1].writer.write(_LEAF_FLAG) if rows[-1].writer.write(line): # if we failed to write, despite having an empty page to write to, # then line is too big. raising the error avoids infinite recursion # searching for a suitably large page that will not be found. if new_leaf: raise errors.BadIndexKey(string_key) # this key did not fit in the node: rows[-1].finish_node() key_line = string_key + "\n" new_row = True for row in reversed(rows[:-1]): # Mark the start of the next node in the node above. If it # doesn't fit then propagate upwards until we find one that # it does fit into. if row.writer.write(key_line): row.finish_node() else: # We've found a node that can handle the pointer. new_row = False break # If we reached the current root without being able to mark the # division point, then we need a new root: if new_row: # We need a new row if 'index' in debug.debug_flags: trace.mutter('Inserting new global row.') new_row = _InternalBuilderRow() reserved_bytes = 0 rows.insert(0, new_row) # This will be padded, hence the -100 new_row.writer = chunk_writer.ChunkWriter( _PAGE_SIZE - _RESERVED_HEADER_BYTES, reserved_bytes, optimize_for_size=self._optimize_for_size) new_row.writer.write(_INTERNAL_FLAG) new_row.writer.write(_INTERNAL_OFFSET + str(rows[1].nodes - 1) + "\n") new_row.writer.write(key_line) self._add_key(string_key, line, rows, allow_optimize=allow_optimize) def _write_nodes(self, node_iterator, allow_optimize=True): """Write node_iterator out as a B+Tree. :param node_iterator: An iterator of sorted nodes. Each node should match the output given by iter_all_entries. :param allow_optimize: If set to False, prevent setting the optimize flag when writing out. This is used by the _spill_mem_keys_to_disk functionality. :return: A file handle for a temporary file containing a B+Tree for the nodes. """ # The index rows - rows[0] is the root, rows[1] is the layer under it # etc. rows = [] # forward sorted by key. In future we may consider topological sorting, # at the cost of table scans for direct lookup, or a second index for # direct lookup key_count = 0 # A stack with the number of nodes of each size. 0 is the root node # and must always be 1 (if there are any nodes in the tree). self.row_lengths = [] # Loop over all nodes adding them to the bottom row # (rows[-1]). When we finish a chunk in a row, # propagate the key that didn't fit (comes after the chunk) to the # row above, transitively. for node in node_iterator: if key_count == 0: # First key triggers the first row rows.append(_LeafBuilderRow()) key_count += 1 string_key, line = _btree_serializer._flatten_node(node, self.reference_lists) self._add_key(string_key, line, rows, allow_optimize=allow_optimize) for row in reversed(rows): pad = (type(row) != _LeafBuilderRow) row.finish_node(pad=pad) lines = [_BTSIGNATURE] lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n') lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n') lines.append(_OPTION_LEN + str(key_count) + '\n') row_lengths = [row.nodes for row in rows] lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n') if row_lengths and row_lengths[-1] > 1: result = tempfile.NamedTemporaryFile(prefix='bzr-index-') else: result = cStringIO.StringIO() result.writelines(lines) position = sum(map(len, lines)) root_row = True if position > _RESERVED_HEADER_BYTES: raise AssertionError("Could not fit the header in the" " reserved space: %d > %d" % (position, _RESERVED_HEADER_BYTES)) # write the rows out: for row in rows: reserved = _RESERVED_HEADER_BYTES # reserved space for first node row.spool.flush() row.spool.seek(0) # copy nodes to the finalised file. # Special case the first node as it may be prefixed node = row.spool.read(_PAGE_SIZE) result.write(node[reserved:]) if len(node) == _PAGE_SIZE: result.write("\x00" * (reserved - position)) position = 0 # Only the root row actually has an offset copied_len = osutils.pumpfile(row.spool, result) if copied_len != (row.nodes - 1) * _PAGE_SIZE: if type(row) != _LeafBuilderRow: raise AssertionError("Incorrect amount of data copied" " expected: %d, got: %d" % ((row.nodes - 1) * _PAGE_SIZE, copied_len)) result.flush() size = result.tell() result.seek(0) return result, size def finish(self): """Finalise the index. :return: A file handle for a temporary file containing the nodes added to the index. """ return self._write_nodes(self.iter_all_entries())[0] def iter_all_entries(self): """Iterate over all keys within the index :return: An iterable of (index, key, value, reference_lists). There is no defined order for the result iteration - it will be in the most efficient order for the index (in this case dictionary hash order). """ if 'evil' in debug.debug_flags: trace.mutter_callsite(3, "iter_all_entries scales with size of history.") # Doing serial rather than ordered would be faster; but this shouldn't # be getting called routinely anyway. iterators = [self._iter_mem_nodes()] for backing in self._backing_indices: if backing is not None: iterators.append(backing.iter_all_entries()) if len(iterators) == 1: return iterators[0] return self._iter_smallest(iterators) def iter_entries(self, keys): """Iterate over keys within the index. :param keys: An iterable providing the keys to be retrieved. :return: An iterable of (index, key, value, reference_lists). There is no defined order for the result iteration - it will be in the most efficient order for the index (keys iteration order in this case). """ keys = set(keys) # Note: We don't use keys.intersection() here. If you read the C api, # set.intersection(other) special cases when other is a set and # will iterate the smaller of the two and lookup in the other. # It does *not* do this for any other type (even dict, unlike # some other set functions.) Since we expect keys is generally << # self._nodes, it is faster to iterate over it in a list # comprehension nodes = self._nodes local_keys = [key for key in keys if key in nodes] if self.reference_lists: for key in local_keys: node = nodes[key] yield self, key, node[1], node[0] else: for key in local_keys: node = nodes[key] yield self, key, node[1] # Find things that are in backing indices that have not been handled # yet. if not self._backing_indices: return # We won't find anything there either # Remove all of the keys that we found locally keys.difference_update(local_keys) for backing in self._backing_indices: if backing is None: continue if not keys: return for node in backing.iter_entries(keys): keys.remove(node[1]) yield (self,) + node[1:] def iter_entries_prefix(self, keys): """Iterate over keys within the index using prefix matching. Prefix matching is applied within the tuple of a key, not to within the bytestring of each key element. e.g. if you have the keys ('foo', 'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then only the former key is returned. :param keys: An iterable providing the key prefixes to be retrieved. Each key prefix takes the form of a tuple the length of a key, but with the last N elements 'None' rather than a regular bytestring. The first element cannot be 'None'. :return: An iterable as per iter_all_entries, but restricted to the keys with a matching prefix to those supplied. No additional keys will be returned, and every match that is in the index will be returned. """ # XXX: To much duplication with the GraphIndex class; consider finding # a good place to pull out the actual common logic. keys = set(keys) if not keys: return for backing in self._backing_indices: if backing is None: continue for node in backing.iter_entries_prefix(keys): yield (self,) + node[1:] if self._key_length == 1: for key in keys: # sanity check if key[0] is None: raise errors.BadIndexKey(key) if len(key) != self._key_length: raise errors.BadIndexKey(key) try: node = self._nodes[key] except KeyError: continue if self.reference_lists: yield self, key, node[1], node[0] else: yield self, key, node[1] return for key in keys: # sanity check if key[0] is None: raise errors.BadIndexKey(key) if len(key) != self._key_length: raise errors.BadIndexKey(key) # find what it refers to: key_dict = self._get_nodes_by_key() elements = list(key) # find the subdict to return try: while len(elements) and elements[0] is not None: key_dict = key_dict[elements[0]] elements.pop(0) except KeyError: # a non-existant lookup. continue if len(elements): dicts = [key_dict] while dicts: key_dict = dicts.pop(-1) # can't be empty or would not exist item, value = key_dict.iteritems().next() if type(value) == dict: # push keys dicts.extend(key_dict.itervalues()) else: # yield keys for value in key_dict.itervalues(): yield (self, ) + tuple(value) else: yield (self, ) + key_dict def _get_nodes_by_key(self): if self._nodes_by_key is None: nodes_by_key = {} if self.reference_lists: for key, (references, value) in self._nodes.iteritems(): key_dict = nodes_by_key for subkey in key[:-1]: key_dict = key_dict.setdefault(subkey, {}) key_dict[key[-1]] = key, value, references else: for key, (references, value) in self._nodes.iteritems(): key_dict = nodes_by_key for subkey in key[:-1]: key_dict = key_dict.setdefault(subkey, {}) key_dict[key[-1]] = key, value self._nodes_by_key = nodes_by_key return self._nodes_by_key def key_count(self): """Return an estimate of the number of keys in this index. For InMemoryGraphIndex the estimate is exact. """ return len(self._nodes) + sum(backing.key_count() for backing in self._backing_indices if backing is not None) def validate(self): """In memory index's have no known corruption at the moment.""" class _LeafNode(dict): """A leaf node for a serialised B+Tree index.""" __slots__ = ('min_key', 'max_key', '_keys') def __init__(self, bytes, key_length, ref_list_length): """Parse bytes to create a leaf node object.""" # splitlines mangles the \r delimiters.. don't use it. key_list = _btree_serializer._parse_leaf_lines(bytes, key_length, ref_list_length) if key_list: self.min_key = key_list[0][0] self.max_key = key_list[-1][0] else: self.min_key = self.max_key = None super(_LeafNode, self).__init__(key_list) self._keys = dict(self) def all_items(self): """Return a sorted list of (key, (value, refs)) items""" items = self.items() items.sort() return items def all_keys(self): """Return a sorted list of all keys.""" keys = self.keys() keys.sort() return keys class _InternalNode(object): """An internal node for a serialised B+Tree index.""" __slots__ = ('keys', 'offset') def __init__(self, bytes): """Parse bytes to create an internal node object.""" # splitlines mangles the \r delimiters.. don't use it. self.keys = self._parse_lines(bytes.split('\n')) def _parse_lines(self, lines): nodes = [] self.offset = int(lines[1][7:]) as_st = static_tuple.StaticTuple.from_sequence for line in lines[2:]: if line == '': break nodes.append(as_st(map(intern, line.split('\0'))).intern()) return nodes class BTreeGraphIndex(object): """Access to nodes via the standard GraphIndex interface for B+Tree's. Individual nodes are held in a LRU cache. This holds the root node in memory except when very large walks are done. """ def __init__(self, transport, name, size, unlimited_cache=False, offset=0): """Create a B+Tree index object on the index name. :param transport: The transport to read data for the index from. :param name: The file name of the index on transport. :param size: Optional size of the index in bytes. This allows compatibility with the GraphIndex API, as well as ensuring that the initial read (to read the root node header) can be done without over-reading even on empty indices, and on small indices allows single-IO to read the entire index. :param unlimited_cache: If set to True, then instead of using an LRUCache with size _NODE_CACHE_SIZE, we will use a dict and always cache all leaf nodes. :param offset: The start of the btree index data isn't byte 0 of the file. Instead it starts at some point later. """ self._transport = transport self._name = name self._size = size self._file = None self._recommended_pages = self._compute_recommended_pages() self._root_node = None self._base_offset = offset self._leaf_factory = _LeafNode # Default max size is 100,000 leave values self._leaf_value_cache = None # lru_cache.LRUCache(100*1000) if unlimited_cache: self._leaf_node_cache = {} self._internal_node_cache = {} else: self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE) # We use a FIFO here just to prevent possible blowout. However, a # 300k record btree has only 3k leaf nodes, and only 20 internal # nodes. A value of 100 scales to ~100*100*100 = 1M records. self._internal_node_cache = fifo_cache.FIFOCache(100) self._key_count = None self._row_lengths = None self._row_offsets = None # Start of each row, [-1] is the end def __eq__(self, other): """Equal when self and other were created with the same parameters.""" return ( type(self) == type(other) and self._transport == other._transport and self._name == other._name and self._size == other._size) def __ne__(self, other): return not self.__eq__(other) def _get_and_cache_nodes(self, nodes): """Read nodes and cache them in the lru. The nodes list supplied is sorted and then read from disk, each node being inserted it into the _node_cache. Note: Asking for more nodes than the _node_cache can contain will result in some of the results being immediately discarded, to prevent this an assertion is raised if more nodes are asked for than are cachable. :return: A dict of {node_pos: node} """ found = {} start_of_leaves = None for node_pos, node in self._read_nodes(sorted(nodes)): if node_pos == 0: # Special case self._root_node = node else: if start_of_leaves is None: start_of_leaves = self._row_offsets[-2] if node_pos < start_of_leaves: self._internal_node_cache[node_pos] = node else: self._leaf_node_cache[node_pos] = node found[node_pos] = node return found def _compute_recommended_pages(self): """Convert transport's recommended_page_size into btree pages. recommended_page_size is in bytes, we want to know how many _PAGE_SIZE pages fit in that length. """ recommended_read = self._transport.recommended_page_size() recommended_pages = int(math.ceil(recommended_read / float(_PAGE_SIZE))) return recommended_pages def _compute_total_pages_in_index(self): """How many pages are in the index. If we have read the header we will use the value stored there. Otherwise it will be computed based on the length of the index. """ if self._size is None: raise AssertionError('_compute_total_pages_in_index should not be' ' called when self._size is None') if self._root_node is not None: # This is the number of pages as defined by the header return self._row_offsets[-1] # This is the number of pages as defined by the size of the index. They # should be indentical. total_pages = int(math.ceil(self._size / float(_PAGE_SIZE))) return total_pages def _expand_offsets(self, offsets): """Find extra pages to download. The idea is that we always want to make big-enough requests (like 64kB for http), so that we don't waste round trips. So given the entries that we already have cached and the new pages being downloaded figure out what other pages we might want to read. See also doc/developers/btree_index_prefetch.txt for more details. :param offsets: The offsets to be read :return: A list of offsets to download """ if 'index' in debug.debug_flags: trace.mutter('expanding: %s\toffsets: %s', self._name, offsets) if len(offsets) >= self._recommended_pages: # Don't add more, we are already requesting more than enough if 'index' in debug.debug_flags: trace.mutter(' not expanding large request (%s >= %s)', len(offsets), self._recommended_pages) return offsets if self._size is None: # Don't try anything, because we don't know where the file ends if 'index' in debug.debug_flags: trace.mutter(' not expanding without knowing index size') return offsets total_pages = self._compute_total_pages_in_index() cached_offsets = self._get_offsets_to_cached_pages() # If reading recommended_pages would read the rest of the index, just # do so. if total_pages - len(cached_offsets) <= self._recommended_pages: # Read whatever is left if cached_offsets: expanded = [x for x in xrange(total_pages) if x not in cached_offsets] else: expanded = range(total_pages) if 'index' in debug.debug_flags: trace.mutter(' reading all unread pages: %s', expanded) return expanded if self._root_node is None: # ATM on the first read of the root node of a large index, we don't # bother pre-reading any other pages. This is because the # likelyhood of actually reading interesting pages is very low. # See doc/developers/btree_index_prefetch.txt for a discussion, and # a possible implementation when we are guessing that the second # layer index is small final_offsets = offsets else: tree_depth = len(self._row_lengths) if len(cached_offsets) < tree_depth and len(offsets) == 1: # We haven't read enough to justify expansion # If we are only going to read the root node, and 1 leaf node, # then it isn't worth expanding our request. Once we've read at # least 2 nodes, then we are probably doing a search, and we # start expanding our requests. if 'index' in debug.debug_flags: trace.mutter(' not expanding on first reads') return offsets final_offsets = self._expand_to_neighbors(offsets, cached_offsets, total_pages) final_offsets = sorted(final_offsets) if 'index' in debug.debug_flags: trace.mutter('expanded: %s', final_offsets) return final_offsets def _expand_to_neighbors(self, offsets, cached_offsets, total_pages): """Expand requests to neighbors until we have enough pages. This is called from _expand_offsets after policy has determined that we want to expand. We only want to expand requests within a given layer. We cheat a little bit and assume all requests will be in the same layer. This is true given the current design, but if it changes this algorithm may perform oddly. :param offsets: requested offsets :param cached_offsets: offsets for pages we currently have cached :return: A set() of offsets after expansion """ final_offsets = set(offsets) first = end = None new_tips = set(final_offsets) while len(final_offsets) < self._recommended_pages and new_tips: next_tips = set() for pos in new_tips: if first is None: first, end = self._find_layer_first_and_end(pos) previous = pos - 1 if (previous > 0 and previous not in cached_offsets and previous not in final_offsets and previous >= first): next_tips.add(previous) after = pos + 1 if (after < total_pages and after not in cached_offsets and after not in final_offsets and after < end): next_tips.add(after) # This would keep us from going bigger than # recommended_pages by only expanding the first offsets. # However, if we are making a 'wide' request, it is # reasonable to expand all points equally. # if len(final_offsets) > recommended_pages: # break final_offsets.update(next_tips) new_tips = next_tips return final_offsets def clear_cache(self): """Clear out any cached/memoized values. This can be called at any time, but generally it is used when we have extracted some information, but don't expect to be requesting any more from this index. """ # Note that we don't touch self._root_node or self._internal_node_cache # We don't expect either of those to be big, and it can save # round-trips in the future. We may re-evaluate this if InternalNode # memory starts to be an issue. self._leaf_node_cache.clear() def external_references(self, ref_list_num): if self._root_node is None: self._get_root_node() if ref_list_num + 1 > self.node_ref_lists: raise ValueError('No ref list %d, index has %d ref lists' % (ref_list_num, self.node_ref_lists)) keys = set() refs = set() for node in self.iter_all_entries(): keys.add(node[1]) refs.update(node[3][ref_list_num]) return refs - keys def _find_layer_first_and_end(self, offset): """Find the start/stop nodes for the layer corresponding to offset. :return: (first, end) first is the first node in this layer end is the first node of the next layer """ first = end = 0 for roffset in self._row_offsets: first = end end = roffset if offset < roffset: break return first, end def _get_offsets_to_cached_pages(self): """Determine what nodes we already have cached.""" cached_offsets = set(self._internal_node_cache.keys()) cached_offsets.update(self._leaf_node_cache.keys()) if self._root_node is not None: cached_offsets.add(0) return cached_offsets def _get_root_node(self): if self._root_node is None: # We may not have a root node yet self._get_internal_nodes([0]) return self._root_node def _get_nodes(self, cache, node_indexes): found = {} needed = [] for idx in node_indexes: if idx == 0 and self._root_node is not None: found[0] = self._root_node continue try: found[idx] = cache[idx] except KeyError: needed.append(idx) if not needed: return found needed = self._expand_offsets(needed) found.update(self._get_and_cache_nodes(needed)) return found def _get_internal_nodes(self, node_indexes): """Get a node, from cache or disk. After getting it, the node will be cached. """ return self._get_nodes(self._internal_node_cache, node_indexes) def _cache_leaf_values(self, nodes): """Cache directly from key => value, skipping the btree.""" if self._leaf_value_cache is not None: for node in nodes.itervalues(): for key, value in node.all_items(): if key in self._leaf_value_cache: # Don't add the rest of the keys, we've seen this node # before. break self._leaf_value_cache[key] = value def _get_leaf_nodes(self, node_indexes): """Get a bunch of nodes, from cache or disk.""" found = self._get_nodes(self._leaf_node_cache, node_indexes) self._cache_leaf_values(found) return found def iter_all_entries(self): """Iterate over all keys within the index. :return: An iterable of (index, key, value) or (index, key, value, reference_lists). The former tuple is used when there are no reference lists in the index, making the API compatible with simple key:value index types. There is no defined order for the result iteration - it will be in the most efficient order for the index. """ if 'evil' in debug.debug_flags: trace.mutter_callsite(3, "iter_all_entries scales with size of history.") if not self.key_count(): return if self._row_offsets[-1] == 1: # There is only the root node, and we read that via key_count() if self.node_ref_lists: for key, (value, refs) in self._root_node.all_items(): yield (self, key, value, refs) else: for key, (value, refs) in self._root_node.all_items(): yield (self, key, value) return start_of_leaves = self._row_offsets[-2] end_of_leaves = self._row_offsets[-1] needed_offsets = range(start_of_leaves, end_of_leaves) if needed_offsets == [0]: # Special case when we only have a root node, as we have already # read everything nodes = [(0, self._root_node)] else: nodes = self._read_nodes(needed_offsets) # We iterate strictly in-order so that we can use this function # for spilling index builds to disk. if self.node_ref_lists: for _, node in nodes: for key, (value, refs) in node.all_items(): yield (self, key, value, refs) else: for _, node in nodes: for key, (value, refs) in node.all_items(): yield (self, key, value) @staticmethod def _multi_bisect_right(in_keys, fixed_keys): """Find the positions where each 'in_key' would fit in fixed_keys. This is equivalent to doing "bisect_right" on each in_key into fixed_keys :param in_keys: A sorted list of keys to match with fixed_keys :param fixed_keys: A sorted list of keys to match against :return: A list of (integer position, [key list]) tuples. """ if not in_keys: return [] if not fixed_keys: # no pointers in the fixed_keys list, which means everything must # fall to the left. return [(0, in_keys)] # TODO: Iterating both lists will generally take M + N steps # Bisecting each key will generally take M * log2 N steps. # If we had an efficient way to compare, we could pick the method # based on which has the fewer number of steps. # There is also the argument that bisect_right is a compiled # function, so there is even more to be gained. # iter_steps = len(in_keys) + len(fixed_keys) # bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2) if len(in_keys) == 1: # Bisect will always be faster for M = 1 return [(bisect.bisect_right(fixed_keys, in_keys[0]), in_keys)] # elif bisect_steps < iter_steps: # offsets = {} # for key in in_keys: # offsets.setdefault(bisect_right(fixed_keys, key), # []).append(key) # return [(o, offsets[o]) for o in sorted(offsets)] in_keys_iter = iter(in_keys) fixed_keys_iter = enumerate(fixed_keys) cur_in_key = in_keys_iter.next() cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next() class InputDone(Exception): pass class FixedDone(Exception): pass output = [] cur_out = [] # TODO: Another possibility is that rather than iterating on each side, # we could use a combination of bisecting and iterating. For # example, while cur_in_key < fixed_key, bisect to find its # point, then iterate all matching keys, then bisect (restricted # to only the remainder) for the next one, etc. try: while True: if cur_in_key < cur_fixed_key: cur_keys = [] cur_out = (cur_fixed_offset, cur_keys) output.append(cur_out) while cur_in_key < cur_fixed_key: cur_keys.append(cur_in_key) try: cur_in_key = in_keys_iter.next() except StopIteration: raise InputDone # At this point cur_in_key must be >= cur_fixed_key # step the cur_fixed_key until we pass the cur key, or walk off # the end while cur_in_key >= cur_fixed_key: try: cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next() except StopIteration: raise FixedDone except InputDone: # We consumed all of the input, nothing more to do pass except FixedDone: # There was some input left, but we consumed all of fixed, so we # have to add one more for the tail cur_keys = [cur_in_key] cur_keys.extend(in_keys_iter) cur_out = (len(fixed_keys), cur_keys) output.append(cur_out) return output def _walk_through_internal_nodes(self, keys): """Take the given set of keys, and find the corresponding LeafNodes. :param keys: An unsorted iterable of keys to search for :return: (nodes, index_and_keys) nodes is a dict mapping {index: LeafNode} keys_at_index is a list of tuples of [(index, [keys for Leaf])] """ # 6 seconds spent in miss_torture using the sorted() line. # Even with out of order disk IO it seems faster not to sort it when # large queries are being made. keys_at_index = [(0, sorted(keys))] for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]): node_indexes = [idx for idx, s_keys in keys_at_index] nodes = self._get_internal_nodes(node_indexes) next_nodes_and_keys = [] for node_index, sub_keys in keys_at_index: node = nodes[node_index] positions = self._multi_bisect_right(sub_keys, node.keys) node_offset = next_row_start + node.offset next_nodes_and_keys.extend([(node_offset + pos, s_keys) for pos, s_keys in positions]) keys_at_index = next_nodes_and_keys # We should now be at the _LeafNodes node_indexes = [idx for idx, s_keys in keys_at_index] # TODO: We may *not* want to always read all the nodes in one # big go. Consider setting a max size on this. nodes = self._get_leaf_nodes(node_indexes) return nodes, keys_at_index def iter_entries(self, keys): """Iterate over keys within the index. :param keys: An iterable providing the keys to be retrieved. :return: An iterable as per iter_all_entries, but restricted to the keys supplied. No additional keys will be returned, and every key supplied that is in the index will be returned. """ # 6 seconds spent in miss_torture using the sorted() line. # Even with out of order disk IO it seems faster not to sort it when # large queries are being made. # However, now that we are doing multi-way bisecting, we need the keys # in sorted order anyway. We could change the multi-way code to not # require sorted order. (For example, it bisects for the first node, # does an in-order search until a key comes before the current point, # which it then bisects for, etc.) keys = frozenset(keys) if not keys: return if not self.key_count(): return needed_keys = [] if self._leaf_value_cache is None: needed_keys = keys else: for key in keys: value = self._leaf_value_cache.get(key, None) if value is not None: # This key is known not to be here, skip it value, refs = value if self.node_ref_lists: yield (self, key, value, refs) else: yield (self, key, value) else: needed_keys.append(key) last_key = None needed_keys = keys if not needed_keys: return nodes, nodes_and_keys = self._walk_through_internal_nodes(needed_keys) for node_index, sub_keys in nodes_and_keys: if not sub_keys: continue node = nodes[node_index] for next_sub_key in sub_keys: if next_sub_key in node: value, refs = node[next_sub_key] if self.node_ref_lists: yield (self, next_sub_key, value, refs) else: yield (self, next_sub_key, value) def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys): """Find the parent_map information for the set of keys. This populates the parent_map dict and missing_keys set based on the queried keys. It also can fill out an arbitrary number of parents that it finds while searching for the supplied keys. It is unlikely that you want to call this directly. See "CombinedGraphIndex.find_ancestry()" for a more appropriate API. :param keys: A keys whose ancestry we want to return Every key will either end up in 'parent_map' or 'missing_keys'. :param ref_list_num: This index in the ref_lists is the parents we care about. :param parent_map: {key: parent_keys} for keys that are present in this index. This may contain more entries than were in 'keys', that are reachable ancestors of the keys requested. :param missing_keys: keys which are known to be missing in this index. This may include parents that were not directly requested, but we were able to determine that they are not present in this index. :return: search_keys parents that were found but not queried to know if they are missing or present. Callers can re-query this index for those keys, and they will be placed into parent_map or missing_keys """ if not self.key_count(): # We use key_count() to trigger reading the root node and # determining info about this BTreeGraphIndex # If we don't have any keys, then everything is missing missing_keys.update(keys) return set() if ref_list_num >= self.node_ref_lists: raise ValueError('No ref list %d, index has %d ref lists' % (ref_list_num, self.node_ref_lists)) # The main trick we are trying to accomplish is that when we find a # key listing its parents, we expect that the parent key is also likely # to sit on the same page. Allowing us to expand parents quickly # without suffering the full stack of bisecting, etc. nodes, nodes_and_keys = self._walk_through_internal_nodes(keys) # These are parent keys which could not be immediately resolved on the # page where the child was present. Note that we may already be # searching for that key, and it may actually be present [or known # missing] on one of the other pages we are reading. # TODO: # We could try searching for them in the immediate previous or next # page. If they occur "later" we could put them in a pending lookup # set, and then for each node we read thereafter we could check to # see if they are present. # However, we don't know the impact of keeping this list of things # that I'm going to search for every node I come across from here on # out. # It doesn't handle the case when the parent key is missing on a # page that we *don't* read. So we already have to handle being # re-entrant for that. # Since most keys contain a date string, they are more likely to be # found earlier in the file than later, but we would know that right # away (key < min_key), and wouldn't keep searching it on every other # page that we read. # Mostly, it is an idea, one which should be benchmarked. parents_not_on_page = set() for node_index, sub_keys in nodes_and_keys: if not sub_keys: continue # sub_keys is all of the keys we are looking for that should exist # on this page, if they aren't here, then they won't be found node = nodes[node_index] parents_to_check = set() for next_sub_key in sub_keys: if next_sub_key not in node: # This one is just not present in the index at all missing_keys.add(next_sub_key) else: value, refs = node[next_sub_key] parent_keys = refs[ref_list_num] parent_map[next_sub_key] = parent_keys parents_to_check.update(parent_keys) # Don't look for things we've already found parents_to_check = parents_to_check.difference(parent_map) # this can be used to test the benefit of having the check loop # inlined. # parents_not_on_page.update(parents_to_check) # continue while parents_to_check: next_parents_to_check = set() for key in parents_to_check: if key in node: value, refs = node[key] parent_keys = refs[ref_list_num] parent_map[key] = parent_keys next_parents_to_check.update(parent_keys) else: # This parent either is genuinely missing, or should be # found on another page. Perf test whether it is better # to check if this node should fit on this page or not. # in the 'everything-in-one-pack' scenario, this *not* # doing the check is 237ms vs 243ms. # So slightly better, but I assume the standard 'lots # of packs' is going to show a reasonable improvement # from the check, because it avoids 'going around # again' for everything that is in another index # parents_not_on_page.add(key) # Missing for some reason if key < node.min_key: # in the case of bzr.dev, 3.4k/5.3k misses are # 'earlier' misses (65%) parents_not_on_page.add(key) elif key > node.max_key: # This parent key would be present on a different # LeafNode parents_not_on_page.add(key) else: # assert key != node.min_key and key != node.max_key # If it was going to be present, it would be on # *this* page, so mark it missing. missing_keys.add(key) parents_to_check = next_parents_to_check.difference(parent_map) # Might want to do another .difference() from missing_keys # parents_not_on_page could have been found on a different page, or be # known to be missing. So cull out everything that has already been # found. search_keys = parents_not_on_page.difference( parent_map).difference(missing_keys) return search_keys def iter_entries_prefix(self, keys): """Iterate over keys within the index using prefix matching. Prefix matching is applied within the tuple of a key, not to within the bytestring of each key element. e.g. if you have the keys ('foo', 'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then only the former key is returned. WARNING: Note that this method currently causes a full index parse unconditionally (which is reasonably appropriate as it is a means for thunking many small indices into one larger one and still supplies iter_all_entries at the thunk layer). :param keys: An iterable providing the key prefixes to be retrieved. Each key prefix takes the form of a tuple the length of a key, but with the last N elements 'None' rather than a regular bytestring. The first element cannot be 'None'. :return: An iterable as per iter_all_entries, but restricted to the keys with a matching prefix to those supplied. No additional keys will be returned, and every match that is in the index will be returned. """ keys = sorted(set(keys)) if not keys: return # Load if needed to check key lengths if self._key_count is None: self._get_root_node() # TODO: only access nodes that can satisfy the prefixes we are looking # for. For now, to meet API usage (as this function is not used by # current bzrlib) just suck the entire index and iterate in memory. nodes = {} if self.node_ref_lists: if self._key_length == 1: for _1, key, value, refs in self.iter_all_entries(): nodes[key] = value, refs else: nodes_by_key = {} for _1, key, value, refs in self.iter_all_entries(): key_value = key, value, refs # For a key of (foo, bar, baz) create # _nodes_by_key[foo][bar][baz] = key_value key_dict = nodes_by_key for subkey in key[:-1]: key_dict = key_dict.setdefault(subkey, {}) key_dict[key[-1]] = key_value else: if self._key_length == 1: for _1, key, value in self.iter_all_entries(): nodes[key] = value else: nodes_by_key = {} for _1, key, value in self.iter_all_entries(): key_value = key, value # For a key of (foo, bar, baz) create # _nodes_by_key[foo][bar][baz] = key_value key_dict = nodes_by_key for subkey in key[:-1]: key_dict = key_dict.setdefault(subkey, {}) key_dict[key[-1]] = key_value if self._key_length == 1: for key in keys: # sanity check if key[0] is None: raise errors.BadIndexKey(key) if len(key) != self._key_length: raise errors.BadIndexKey(key) try: if self.node_ref_lists: value, node_refs = nodes[key] yield self, key, value, node_refs else: yield self, key, nodes[key] except KeyError: pass return for key in keys: # sanity check if key[0] is None: raise errors.BadIndexKey(key) if len(key) != self._key_length: raise errors.BadIndexKey(key) # find what it refers to: key_dict = nodes_by_key elements = list(key) # find the subdict whose contents should be returned. try: while len(elements) and elements[0] is not None: key_dict = key_dict[elements[0]] elements.pop(0) except KeyError: # a non-existant lookup. continue if len(elements): dicts = [key_dict] while dicts: key_dict = dicts.pop(-1) # can't be empty or would not exist item, value = key_dict.iteritems().next() if type(value) == dict: # push keys dicts.extend(key_dict.itervalues()) else: # yield keys for value in key_dict.itervalues(): # each value is the key:value:node refs tuple # ready to yield. yield (self, ) + value else: # the last thing looked up was a terminal element yield (self, ) + key_dict def key_count(self): """Return an estimate of the number of keys in this index. For BTreeGraphIndex the estimate is exact as it is contained in the header. """ if self._key_count is None: self._get_root_node() return self._key_count def _compute_row_offsets(self): """Fill out the _row_offsets attribute based on _row_lengths.""" offsets = [] row_offset = 0 for row in self._row_lengths: offsets.append(row_offset) row_offset += row offsets.append(row_offset) self._row_offsets = offsets def _parse_header_from_bytes(self, bytes): """Parse the header from a region of bytes. :param bytes: The data to parse. :return: An offset, data tuple such as readv yields, for the unparsed data. (which may be of length 0). """ signature = bytes[0:len(self._signature())] if not signature == self._signature(): raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex) lines = bytes[len(self._signature()):].splitlines() options_line = lines[0] if not options_line.startswith(_OPTION_NODE_REFS): raise errors.BadIndexOptions(self) try: self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):]) except ValueError: raise errors.BadIndexOptions(self) options_line = lines[1] if not options_line.startswith(_OPTION_KEY_ELEMENTS): raise errors.BadIndexOptions(self) try: self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):]) except ValueError: raise errors.BadIndexOptions(self) options_line = lines[2] if not options_line.startswith(_OPTION_LEN): raise errors.BadIndexOptions(self) try: self._key_count = int(options_line[len(_OPTION_LEN):]) except ValueError: raise errors.BadIndexOptions(self) options_line = lines[3] if not options_line.startswith(_OPTION_ROW_LENGTHS): raise errors.BadIndexOptions(self) try: self._row_lengths = map(int, [length for length in options_line[len(_OPTION_ROW_LENGTHS):].split(',') if len(length)]) except ValueError: raise errors.BadIndexOptions(self) self._compute_row_offsets() # calculate the bytes we have processed header_end = (len(signature) + sum(map(len, lines[0:4])) + 4) return header_end, bytes[header_end:] def _read_nodes(self, nodes): """Read some nodes from disk into the LRU cache. This performs a readv to get the node data into memory, and parses each node, then yields it to the caller. The nodes are requested in the supplied order. If possible doing sort() on the list before requesting a read may improve performance. :param nodes: The nodes to read. 0 - first node, 1 - second node etc. :return: None """ # may be the byte string of the whole file bytes = None # list of (offset, length) regions of the file that should, evenually # be read in to data_ranges, either from 'bytes' or from the transport ranges = [] base_offset = self._base_offset for index in nodes: offset = (index * _PAGE_SIZE) size = _PAGE_SIZE if index == 0: # Root node - special case if self._size: size = min(_PAGE_SIZE, self._size) else: # The only case where we don't know the size, is for very # small indexes. So we read the whole thing bytes = self._transport.get_bytes(self._name) num_bytes = len(bytes) self._size = num_bytes - base_offset # the whole thing should be parsed out of 'bytes' ranges = [(start, min(_PAGE_SIZE, num_bytes - start)) for start in xrange(base_offset, num_bytes, _PAGE_SIZE)] break else: if offset > self._size: raise AssertionError('tried to read past the end' ' of the file %s > %s' % (offset, self._size)) size = min(size, self._size - offset) ranges.append((base_offset + offset, size)) if not ranges: return elif bytes is not None: # already have the whole file data_ranges = [(start, bytes[start:start+size]) for start, size in ranges] elif self._file is None: data_ranges = self._transport.readv(self._name, ranges) else: data_ranges = [] for offset, size in ranges: self._file.seek(offset) data_ranges.append((offset, self._file.read(size))) for offset, data in data_ranges: offset -= base_offset if offset == 0: # extract the header offset, data = self._parse_header_from_bytes(data) if len(data) == 0: continue bytes = zlib.decompress(data) if bytes.startswith(_LEAF_FLAG): node = self._leaf_factory(bytes, self._key_length, self.node_ref_lists) elif bytes.startswith(_INTERNAL_FLAG): node = _InternalNode(bytes) else: raise AssertionError("Unknown node type for %r" % bytes) yield offset / _PAGE_SIZE, node def _signature(self): """The file signature for this index type.""" return _BTSIGNATURE def validate(self): """Validate that everything in the index can be accessed.""" # just read and parse every node. self._get_root_node() if len(self._row_lengths) > 1: start_node = self._row_offsets[1] else: # We shouldn't be reading anything anyway start_node = 1 node_end = self._row_offsets[-1] for node in self._read_nodes(range(start_node, node_end)): pass _gcchk_factory = _LeafNode try: from bzrlib import _btree_serializer_pyx as _btree_serializer _gcchk_factory = _btree_serializer._parse_into_chk except ImportError, e: osutils.failed_to_load_extension(e) from bzrlib import _btree_serializer_py as _btree_serializer bzr-2.7.0/bzrlib/bugtracker.py0000644000000000000000000002655611673360271014456 0ustar 00000000000000# Copyright (C) 2007-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import from bzrlib import registry from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import errors, urlutils """) """Provides a shorthand for referring to bugs on a variety of bug trackers. 'commit --fixes' stores references to bugs as a -> mapping in the properties for that revision. However, it's inconvenient to type out full URLs for bugs on the command line, particularly given that many users will be using only a single bug tracker per branch. Thus, this module provides a registry of types of bug tracker (e.g. Launchpad, Trac). Given an abbreviated name (e.g. 'lp', 'twisted') and a branch with configuration information, these tracker types can return an instance capable of converting bug IDs into URLs. """ _bugs_help = \ """When making a commit, metadata about bugs fixed by that change can be recorded by using the ``--fixes`` option. For each bug marked as fixed, an entry is included in the 'bugs' revision property stating ' '. (The only ``status`` value currently supported is ``fixed.``) The ``--fixes`` option allows you to specify a bug tracker and a bug identifier rather than a full URL. This looks like:: bzr commit --fixes : or:: bzr commit --fixes where "" is an identifier for the bug tracker, and "" is the identifier for that bug within the bugtracker, usually the bug number. If "" is not specified the ``bugtracker`` set in the branch or global configuration is used. Bazaar knows about a few bug trackers that have many users. If you use one of these bug trackers then there is no setup required to use this feature, you just need to know the tracker identifier to use. These are the bugtrackers that are built in: ============================ ============ ============ URL Abbreviation Example ============================ ============ ============ https://bugs.launchpad.net/ lp lp:12345 http://bugs.debian.org/ deb deb:12345 http://bugzilla.gnome.org/ gnome gnome:12345 ============================ ============ ============ For the bug trackers not listed above configuration is required. Support for generating the URLs for any project using Bugzilla or Trac is built in, along with a template mechanism for other bugtrackers with simple URL schemes. If your bug tracker can't be described by one of the schemes described below then you can write a plugin to support it. If you use Bugzilla or Trac, then you only need to set a configuration variable which contains the base URL of the bug tracker. These options can go into ``bazaar.conf``, ``branch.conf`` or into a branch-specific configuration section in ``locations.conf``. You can set up these values for each of the projects you work on. Note: As you provide a short name for each tracker, you can specify one or more bugs in one or more trackers at commit time if you wish. Launchpad --------- Use ``bzr commit --fixes lp:2`` to record that this commit fixes bug 2. bugzilla__url ---------------------- If present, the location of the Bugzilla bug tracker referred to by . This option can then be used together with ``bzr commit --fixes`` to mark bugs in that tracker as being fixed by that commit. For example:: bugzilla_squid_url = http://bugs.squid-cache.org would allow ``bzr commit --fixes squid:1234`` to mark Squid's bug 1234 as fixed. trac__url ------------------ If present, the location of the Trac instance referred to by . This option can then be used together with ``bzr commit --fixes`` to mark bugs in that tracker as being fixed by that commit. For example:: trac_twisted_url = http://www.twistedmatrix.com/trac would allow ``bzr commit --fixes twisted:1234`` to mark Twisted's bug 1234 as fixed. bugtracker__url ------------------------ If present, the location of a generic bug tracker instance referred to by . The location must contain an ``{id}`` placeholder, which will be replaced by a specific bug ID. This option can then be used together with ``bzr commit --fixes`` to mark bugs in that tracker as being fixed by that commit. For example:: bugtracker_python_url = http://bugs.python.org/issue{id} would allow ``bzr commit --fixes python:1234`` to mark bug 1234 in Python's Roundup bug tracker as fixed, or:: bugtracker_cpan_url = http://rt.cpan.org/Public/Bug/Display.html?id={id} would allow ``bzr commit --fixes cpan:1234`` to mark bug 1234 in CPAN's RT bug tracker as fixed, or:: bugtracker_hudson_url = http://issues.hudson-ci.org/browse/{id} would allow ``bzr commit --fixes hudson:HUDSON-1234`` to mark bug HUDSON-1234 in Hudson's JIRA bug tracker as fixed. """ def get_bug_url(abbreviated_bugtracker_name, branch, bug_id): """Return a URL pointing to the canonical web page of the bug identified by 'bug_id'. """ tracker = tracker_registry.get_tracker(abbreviated_bugtracker_name, branch) return tracker.get_bug_url(bug_id) class TrackerRegistry(registry.Registry): """Registry of bug tracker types.""" def get_tracker(self, abbreviated_bugtracker_name, branch): """Return the first registered tracker that understands 'abbreviated_bugtracker_name'. If no such tracker is found, raise KeyError. """ for tracker_name in self.keys(): tracker_type = self.get(tracker_name) tracker = tracker_type.get(abbreviated_bugtracker_name, branch) if tracker is not None: return tracker raise errors.UnknownBugTrackerAbbreviation(abbreviated_bugtracker_name, branch) def help_topic(self, topic): return _bugs_help tracker_registry = TrackerRegistry() """Registry of bug trackers.""" class BugTracker(object): """Base class for bug trackers.""" def check_bug_id(self, bug_id): """Check that the bug_id is valid. The base implementation assumes that all bug_ids are valid. """ def get_bug_url(self, bug_id): """Return the URL for bug_id. Raise an error if bug ID is malformed.""" self.check_bug_id(bug_id) return self._get_bug_url(bug_id) def _get_bug_url(self, bug_id): """Given a validated bug_id, return the bug's web page's URL.""" class IntegerBugTracker(BugTracker): """A bug tracker that only allows integer bug IDs.""" def check_bug_id(self, bug_id): try: int(bug_id) except ValueError: raise errors.MalformedBugIdentifier(bug_id, "Must be an integer") class UniqueIntegerBugTracker(IntegerBugTracker): """A style of bug tracker that exists in one place only, such as Launchpad. If you have one of these trackers then register an instance passing in an abbreviated name for the bug tracker and a base URL. The bug ids are appended directly to the URL. """ def __init__(self, abbreviated_bugtracker_name, base_url): self.abbreviation = abbreviated_bugtracker_name self.base_url = base_url def get(self, abbreviated_bugtracker_name, branch): """Returns the tracker if the abbreviation matches. Returns None otherwise.""" if abbreviated_bugtracker_name != self.abbreviation: return None return self def _get_bug_url(self, bug_id): """Return the URL for bug_id.""" return self.base_url + bug_id tracker_registry.register( 'launchpad', UniqueIntegerBugTracker('lp', 'https://launchpad.net/bugs/')) tracker_registry.register( 'debian', UniqueIntegerBugTracker('deb', 'http://bugs.debian.org/')) tracker_registry.register('gnome', UniqueIntegerBugTracker('gnome', 'http://bugzilla.gnome.org/show_bug.cgi?id=')) class URLParametrizedBugTracker(BugTracker): """A type of bug tracker that can be found on a variety of different sites, and thus needs to have the base URL configured. Looks for a config setting in the form '__url'. `type_name` is the name of the type of tracker and `abbreviation` is a short name for the particular instance. """ def get(self, abbreviation, branch): config = branch.get_config() url = config.get_user_option( "%s_%s_url" % (self.type_name, abbreviation), expand=False) if url is None: return None self._base_url = url return self def __init__(self, type_name, bug_area): self.type_name = type_name self._bug_area = bug_area def _get_bug_url(self, bug_id): """Return a URL for a bug on this Trac instance.""" return urlutils.join(self._base_url, self._bug_area) + str(bug_id) class URLParametrizedIntegerBugTracker(IntegerBugTracker, URLParametrizedBugTracker): """A type of bug tracker that only allows integer bug IDs. This can be found on a variety of different sites, and thus needs to have the base URL configured. Looks for a config setting in the form '__url'. `type_name` is the name of the type of tracker (e.g. 'bugzilla' or 'trac') and `abbreviation` is a short name for the particular instance (e.g. 'squid' or 'apache'). """ tracker_registry.register( 'trac', URLParametrizedIntegerBugTracker('trac', 'ticket/')) tracker_registry.register( 'bugzilla', URLParametrizedIntegerBugTracker('bugzilla', 'show_bug.cgi?id=')) class GenericBugTracker(URLParametrizedBugTracker): """Generic bug tracker specified by an URL template.""" def __init__(self): super(GenericBugTracker, self).__init__('bugtracker', None) def get(self, abbreviation, branch): self._abbreviation = abbreviation return super(GenericBugTracker, self).get(abbreviation, branch) def _get_bug_url(self, bug_id): """Given a validated bug_id, return the bug's web page's URL.""" if '{id}' not in self._base_url: raise errors.InvalidBugTrackerURL(self._abbreviation, self._base_url) return self._base_url.replace('{id}', str(bug_id)) tracker_registry.register('generic', GenericBugTracker()) FIXED = 'fixed' ALLOWED_BUG_STATUSES = set([FIXED]) def encode_fixes_bug_urls(bug_urls): """Get the revision property value for a commit that fixes bugs. :param bug_urls: An iterable of escaped URLs to bugs. These normally come from `get_bug_url`. :return: A string that will be set as the 'bugs' property of a revision as part of a commit. """ return '\n'.join(('%s %s' % (url, FIXED)) for url in bug_urls) bzr-2.7.0/bzrlib/builtins.py0000644000000000000000000102543412147364641014153 0ustar 00000000000000# Copyright (C) 2005-2012 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """builtin bzr commands""" from __future__ import absolute_import import os import bzrlib.bzrdir from bzrlib import lazy_import lazy_import.lazy_import(globals(), """ import cStringIO import errno import sys import time import bzrlib from bzrlib import ( bugtracker, bundle, btree_index, controldir, directory_service, delta, config as _mod_config, errors, globbing, hooks, log, merge as _mod_merge, merge_directive, osutils, reconfigure, rename_map, revision as _mod_revision, static_tuple, timestamp, transport, ui, urlutils, views, gpg, ) from bzrlib.branch import Branch from bzrlib.conflicts import ConflictList from bzrlib.transport import memory from bzrlib.revisionspec import RevisionSpec, RevisionInfo from bzrlib.smtp_connection import SMTPConnection from bzrlib.workingtree import WorkingTree from bzrlib.i18n import gettext, ngettext """) from bzrlib.commands import ( Command, builtin_command_registry, display_command, ) from bzrlib.option import ( ListOption, Option, RegistryOption, custom_help, _parse_revision_str, ) from bzrlib.trace import mutter, note, warning, is_quiet, get_verbosity_level from bzrlib import ( symbol_versioning, ) def _get_branch_location(control_dir, possible_transports=None): """Return location of branch for this control dir.""" try: target = control_dir.get_branch_reference() except errors.NotBranchError: return control_dir.root_transport.base if target is not None: return target this_branch = control_dir.open_branch( possible_transports=possible_transports) # This may be a heavy checkout, where we want the master branch master_location = this_branch.get_bound_location() if master_location is not None: return master_location # If not, use a local sibling return this_branch.base def _is_colocated(control_dir, possible_transports=None): """Check if the branch in control_dir is colocated. :param control_dir: Control directory :return: Tuple with boolean indicating whether the branch is colocated and the full URL to the actual branch """ # This path is meant to be relative to the existing branch this_url = _get_branch_location(control_dir, possible_transports=possible_transports) # Perhaps the target control dir supports colocated branches? try: root = controldir.ControlDir.open(this_url, possible_transports=possible_transports) except errors.NotBranchError: return (False, this_url) else: try: wt = control_dir.open_workingtree() except (errors.NoWorkingTree, errors.NotLocalUrl): return (False, this_url) else: return ( root._format.colocated_branches and control_dir.control_url == root.control_url, this_url) def lookup_new_sibling_branch(control_dir, location, possible_transports=None): """Lookup the location for a new sibling branch. :param control_dir: Control directory to find sibling branches from :param location: Name of the new branch :return: Full location to the new branch """ location = directory_service.directories.dereference(location) if '/' not in location and '\\' not in location: (colocated, this_url) = _is_colocated(control_dir, possible_transports) if colocated: return urlutils.join_segment_parameters(this_url, {"branch": urlutils.escape(location)}) else: return urlutils.join(this_url, '..', urlutils.escape(location)) return location def open_sibling_branch(control_dir, location, possible_transports=None): """Open a branch, possibly a sibling of another. :param control_dir: Control directory relative to which to lookup the location. :param location: Location to look up :return: branch to open """ try: # Perhaps it's a colocated branch? return control_dir.open_branch(location, possible_transports=possible_transports) except (errors.NotBranchError, errors.NoColocatedBranchSupport): this_url = _get_branch_location(control_dir) return Branch.open( urlutils.join( this_url, '..', urlutils.escape(location))) def open_nearby_branch(near=None, location=None, possible_transports=None): """Open a nearby branch. :param near: Optional location of container from which to open branch :param location: Location of the branch :return: Branch instance """ if near is None: if location is None: location = "." try: return Branch.open(location, possible_transports=possible_transports) except errors.NotBranchError: near = "." cdir = controldir.ControlDir.open(near, possible_transports=possible_transports) return open_sibling_branch(cdir, location, possible_transports=possible_transports) def iter_sibling_branches(control_dir, possible_transports=None): """Iterate over the siblings of a branch. :param control_dir: Control directory for which to look up the siblings :return: Iterator over tuples with branch name and branch object """ seen_urls = set() try: reference = control_dir.get_branch_reference() except errors.NotBranchError: # There is no active branch, just return the colocated branches. for name, branch in control_dir.get_branches().iteritems(): yield name, branch return if reference is not None: ref_branch = Branch.open(reference, possible_transports=possible_transports) else: ref_branch = None if ref_branch is None or ref_branch.name: if ref_branch is not None: control_dir = ref_branch.bzrdir for name, branch in control_dir.get_branches().iteritems(): yield name, branch else: repo = ref_branch.bzrdir.find_repository() for branch in repo.find_branches(using=True): name = urlutils.relative_url(repo.user_url, branch.user_url).rstrip("/") yield name, branch def tree_files_for_add(file_list): """ Return a tree and list of absolute paths from a file list. Similar to tree_files, but add handles files a bit differently, so it a custom implementation. In particular, MutableTreeTree.smart_add expects absolute paths, which it immediately converts to relative paths. """ # FIXME Would be nice to just return the relative paths like # internal_tree_files does, but there are a large number of unit tests # that assume the current interface to mutabletree.smart_add if file_list: tree, relpath = WorkingTree.open_containing(file_list[0]) if tree.supports_views(): view_files = tree.views.lookup_view() if view_files: for filename in file_list: if not osutils.is_inside_any(view_files, filename): raise errors.FileOutsideView(filename, view_files) file_list = file_list[:] file_list[0] = tree.abspath(relpath) else: tree = WorkingTree.open_containing(u'.')[0] if tree.supports_views(): view_files = tree.views.lookup_view() if view_files: file_list = view_files view_str = views.view_display_str(view_files) note(gettext("Ignoring files outside view. View is %s") % view_str) return tree, file_list def _get_one_revision(command_name, revisions): if revisions is None: return None if len(revisions) != 1: raise errors.BzrCommandError(gettext( 'bzr %s --revision takes exactly one revision identifier') % ( command_name,)) return revisions[0] def _get_one_revision_tree(command_name, revisions, branch=None, tree=None): """Get a revision tree. Not suitable for commands that change the tree. Specifically, the basis tree in dirstate trees is coupled to the dirstate and doing a commit/uncommit/pull will at best fail due to changing the basis revision data. If tree is passed in, it should be already locked, for lifetime management of the trees internal cached state. """ if branch is None: branch = tree.branch if revisions is None: if tree is not None: rev_tree = tree.basis_tree() else: rev_tree = branch.basis_tree() else: revision = _get_one_revision(command_name, revisions) rev_tree = revision.as_tree(branch) return rev_tree def _get_view_info_for_change_reporter(tree): """Get the view information from a tree for change reporting.""" view_info = None try: current_view = tree.views.get_view_info()[0] if current_view is not None: view_info = (current_view, tree.views.lookup_view()) except errors.ViewsNotSupported: pass return view_info def _open_directory_or_containing_tree_or_branch(filename, directory): """Open the tree or branch containing the specified file, unless the --directory option is used to specify a different branch.""" if directory is not None: return (None, Branch.open(directory), filename) return controldir.ControlDir.open_containing_tree_or_branch(filename) # TODO: Make sure no commands unconditionally use the working directory as a # branch. If a filename argument is used, the first of them should be used to # specify the branch. (Perhaps this can be factored out into some kind of # Argument class, representing a file in a branch, where the first occurrence # opens the branch?) class cmd_status(Command): __doc__ = """Display status summary. This reports on versioned and unknown files, reporting them grouped by state. Possible states are: added Versioned in the working copy but not in the previous revision. removed Versioned in the previous revision but removed or deleted in the working copy. renamed Path of this file changed from the previous revision; the text may also have changed. This includes files whose parent directory was renamed. modified Text has changed since the previous revision. kind changed File kind has been changed (e.g. from file to directory). unknown Not versioned and not matching an ignore pattern. Additionally for directories, symlinks and files with a changed executable bit, Bazaar indicates their type using a trailing character: '/', '@' or '*' respectively. These decorations can be disabled using the '--no-classify' option. To see ignored files use 'bzr ignored'. For details on the changes to file texts, use 'bzr diff'. Note that --short or -S gives status flags for each item, similar to Subversion's status command. To get output similar to svn -q, use bzr status -SV. If no arguments are specified, the status of the entire working directory is shown. Otherwise, only the status of the specified files or directories is reported. If a directory is given, status is reported for everything inside that directory. Before merges are committed, the pending merge tip revisions are shown. To see all pending merge revisions, use the -v option. To skip the display of pending merge information altogether, use the no-pending option or specify a file/directory. To compare the working directory to a specific revision, pass a single revision to the revision argument. To see which files have changed in a specific revision, or between two revisions, pass a revision range to the revision argument. This will produce the same results as calling 'bzr diff --summarize'. """ # TODO: --no-recurse/-N, --recurse options takes_args = ['file*'] takes_options = ['show-ids', 'revision', 'change', 'verbose', Option('short', help='Use short status indicators.', short_name='S'), Option('versioned', help='Only show versioned files.', short_name='V'), Option('no-pending', help='Don\'t show pending merges.', ), Option('no-classify', help='Do not mark object type using indicator.', ), ] aliases = ['st', 'stat'] encoding_type = 'replace' _see_also = ['diff', 'revert', 'status-flags'] @display_command def run(self, show_ids=False, file_list=None, revision=None, short=False, versioned=False, no_pending=False, verbose=False, no_classify=False): from bzrlib.status import show_tree_status if revision and len(revision) > 2: raise errors.BzrCommandError(gettext('bzr status --revision takes exactly' ' one or two revision specifiers')) tree, relfile_list = WorkingTree.open_containing_paths(file_list) # Avoid asking for specific files when that is not needed. if relfile_list == ['']: relfile_list = None # Don't disable pending merges for full trees other than '.'. if file_list == ['.']: no_pending = True # A specific path within a tree was given. elif relfile_list is not None: no_pending = True show_tree_status(tree, show_ids=show_ids, specific_files=relfile_list, revision=revision, to_file=self.outf, short=short, versioned=versioned, show_pending=(not no_pending), verbose=verbose, classify=not no_classify) class cmd_cat_revision(Command): __doc__ = """Write out metadata for a revision. The revision to print can either be specified by a specific revision identifier, or you can use --revision. """ hidden = True takes_args = ['revision_id?'] takes_options = ['directory', 'revision'] # cat-revision is more for frontends so should be exact encoding = 'strict' def print_revision(self, revisions, revid): stream = revisions.get_record_stream([(revid,)], 'unordered', True) record = stream.next() if record.storage_kind == 'absent': raise errors.NoSuchRevision(revisions, revid) revtext = record.get_bytes_as('fulltext') self.outf.write(revtext.decode('utf-8')) @display_command def run(self, revision_id=None, revision=None, directory=u'.'): if revision_id is not None and revision is not None: raise errors.BzrCommandError(gettext('You can only supply one of' ' revision_id or --revision')) if revision_id is None and revision is None: raise errors.BzrCommandError(gettext('You must supply either' ' --revision or a revision_id')) b = controldir.ControlDir.open_containing_tree_or_branch(directory)[1] revisions = b.repository.revisions if revisions is None: raise errors.BzrCommandError(gettext('Repository %r does not support ' 'access to raw revision texts')) b.repository.lock_read() try: # TODO: jam 20060112 should cat-revision always output utf-8? if revision_id is not None: revision_id = osutils.safe_revision_id(revision_id, warn=False) try: self.print_revision(revisions, revision_id) except errors.NoSuchRevision: msg = gettext("The repository {0} contains no revision {1}.").format( b.repository.base, revision_id) raise errors.BzrCommandError(msg) elif revision is not None: for rev in revision: if rev is None: raise errors.BzrCommandError( gettext('You cannot specify a NULL revision.')) rev_id = rev.as_revision_id(b) self.print_revision(revisions, rev_id) finally: b.repository.unlock() class cmd_dump_btree(Command): __doc__ = """Dump the contents of a btree index file to stdout. PATH is a btree index file, it can be any URL. This includes things like .bzr/repository/pack-names, or .bzr/repository/indices/a34b3a...ca4a4.iix By default, the tuples stored in the index file will be displayed. With --raw, we will uncompress the pages, but otherwise display the raw bytes stored in the index. """ # TODO: Do we want to dump the internal nodes as well? # TODO: It would be nice to be able to dump the un-parsed information, # rather than only going through iter_all_entries. However, this is # good enough for a start hidden = True encoding_type = 'exact' takes_args = ['path'] takes_options = [Option('raw', help='Write the uncompressed bytes out,' ' rather than the parsed tuples.'), ] def run(self, path, raw=False): dirname, basename = osutils.split(path) t = transport.get_transport(dirname) if raw: self._dump_raw_bytes(t, basename) else: self._dump_entries(t, basename) def _get_index_and_bytes(self, trans, basename): """Create a BTreeGraphIndex and raw bytes.""" bt = btree_index.BTreeGraphIndex(trans, basename, None) bytes = trans.get_bytes(basename) bt._file = cStringIO.StringIO(bytes) bt._size = len(bytes) return bt, bytes def _dump_raw_bytes(self, trans, basename): import zlib # We need to parse at least the root node. # This is because the first page of every row starts with an # uncompressed header. bt, bytes = self._get_index_and_bytes(trans, basename) for page_idx, page_start in enumerate(xrange(0, len(bytes), btree_index._PAGE_SIZE)): page_end = min(page_start + btree_index._PAGE_SIZE, len(bytes)) page_bytes = bytes[page_start:page_end] if page_idx == 0: self.outf.write('Root node:\n') header_end, data = bt._parse_header_from_bytes(page_bytes) self.outf.write(page_bytes[:header_end]) page_bytes = data self.outf.write('\nPage %d\n' % (page_idx,)) if len(page_bytes) == 0: self.outf.write('(empty)\n'); else: decomp_bytes = zlib.decompress(page_bytes) self.outf.write(decomp_bytes) self.outf.write('\n') def _dump_entries(self, trans, basename): try: st = trans.stat(basename) except errors.TransportNotPossible: # We can't stat, so we'll fake it because we have to do the 'get()' # anyway. bt, _ = self._get_index_and_bytes(trans, basename) else: bt = btree_index.BTreeGraphIndex(trans, basename, st.st_size) for node in bt.iter_all_entries(): # Node is made up of: # (index, key, value, [references]) try: refs = node[3] except IndexError: refs_as_tuples = None else: refs_as_tuples = static_tuple.as_tuples(refs) as_tuple = (tuple(node[1]), node[2], refs_as_tuples) self.outf.write('%s\n' % (as_tuple,)) class cmd_remove_tree(Command): __doc__ = """Remove the working tree from a given branch/checkout. Since a lightweight checkout is little more than a working tree this will refuse to run against one. To re-create the working tree, use "bzr checkout". """ _see_also = ['checkout', 'working-trees'] takes_args = ['location*'] takes_options = [ Option('force', help='Remove the working tree even if it has ' 'uncommitted or shelved changes.'), ] def run(self, location_list, force=False): if not location_list: location_list=['.'] for location in location_list: d = controldir.ControlDir.open(location) try: working = d.open_workingtree() except errors.NoWorkingTree: raise errors.BzrCommandError(gettext("No working tree to remove")) except errors.NotLocalUrl: raise errors.BzrCommandError(gettext("You cannot remove the working tree" " of a remote path")) if not force: if (working.has_changes()): raise errors.UncommittedChanges(working) if working.get_shelf_manager().last_shelf() is not None: raise errors.ShelvedChanges(working) if working.user_url != working.branch.user_url: raise errors.BzrCommandError(gettext("You cannot remove the working tree" " from a lightweight checkout")) d.destroy_workingtree() class cmd_repair_workingtree(Command): __doc__ = """Reset the working tree state file. This is not meant to be used normally, but more as a way to recover from filesystem corruption, etc. This rebuilds the working inventory back to a 'known good' state. Any new modifications (adding a file, renaming, etc) will be lost, though modified files will still be detected as such. Most users will want something more like "bzr revert" or "bzr update" unless the state file has become corrupted. By default this attempts to recover the current state by looking at the headers of the state file. If the state file is too corrupted to even do that, you can supply --revision to force the state of the tree. """ takes_options = ['revision', 'directory', Option('force', help='Reset the tree even if it doesn\'t appear to be' ' corrupted.'), ] hidden = True def run(self, revision=None, directory='.', force=False): tree, _ = WorkingTree.open_containing(directory) self.add_cleanup(tree.lock_tree_write().unlock) if not force: try: tree.check_state() except errors.BzrError: pass # There seems to be a real error here, so we'll reset else: # Refuse raise errors.BzrCommandError(gettext( 'The tree does not appear to be corrupt. You probably' ' want "bzr revert" instead. Use "--force" if you are' ' sure you want to reset the working tree.')) if revision is None: revision_ids = None else: revision_ids = [r.as_revision_id(tree.branch) for r in revision] try: tree.reset_state(revision_ids) except errors.BzrError, e: if revision_ids is None: extra = (gettext(', the header appears corrupt, try passing -r -1' ' to set the state to the last commit')) else: extra = '' raise errors.BzrCommandError(gettext('failed to reset the tree state{0}').format(extra)) class cmd_revno(Command): __doc__ = """Show current revision number. This is equal to the number of revisions on this branch. """ _see_also = ['info'] takes_args = ['location?'] takes_options = [ Option('tree', help='Show revno of working tree.'), 'revision', ] @display_command def run(self, tree=False, location=u'.', revision=None): if revision is not None and tree: raise errors.BzrCommandError(gettext("--tree and --revision can " "not be used together")) if tree: try: wt = WorkingTree.open_containing(location)[0] self.add_cleanup(wt.lock_read().unlock) except (errors.NoWorkingTree, errors.NotLocalUrl): raise errors.NoWorkingTree(location) b = wt.branch revid = wt.last_revision() else: b = Branch.open_containing(location)[0] self.add_cleanup(b.lock_read().unlock) if revision: if len(revision) != 1: raise errors.BzrCommandError(gettext( "Revision numbers only make sense for single " "revisions, not ranges")) revid = revision[0].as_revision_id(b) else: revid = b.last_revision() try: revno_t = b.revision_id_to_dotted_revno(revid) except errors.NoSuchRevision: revno_t = ('???',) revno = ".".join(str(n) for n in revno_t) self.cleanup_now() self.outf.write(revno + '\n') class cmd_revision_info(Command): __doc__ = """Show revision number and revision id for a given revision identifier. """ hidden = True takes_args = ['revision_info*'] takes_options = [ 'revision', custom_help('directory', help='Branch to examine, ' 'rather than the one containing the working directory.'), Option('tree', help='Show revno of working tree.'), ] @display_command def run(self, revision=None, directory=u'.', tree=False, revision_info_list=[]): try: wt = WorkingTree.open_containing(directory)[0] b = wt.branch self.add_cleanup(wt.lock_read().unlock) except (errors.NoWorkingTree, errors.NotLocalUrl): wt = None b = Branch.open_containing(directory)[0] self.add_cleanup(b.lock_read().unlock) revision_ids = [] if revision is not None: revision_ids.extend(rev.as_revision_id(b) for rev in revision) if revision_info_list is not None: for rev_str in revision_info_list: rev_spec = RevisionSpec.from_string(rev_str) revision_ids.append(rev_spec.as_revision_id(b)) # No arguments supplied, default to the last revision if len(revision_ids) == 0: if tree: if wt is None: raise errors.NoWorkingTree(directory) revision_ids.append(wt.last_revision()) else: revision_ids.append(b.last_revision()) revinfos = [] maxlen = 0 for revision_id in revision_ids: try: dotted_revno = b.revision_id_to_dotted_revno(revision_id) revno = '.'.join(str(i) for i in dotted_revno) except errors.NoSuchRevision: revno = '???' maxlen = max(maxlen, len(revno)) revinfos.append([revno, revision_id]) self.cleanup_now() for ri in revinfos: self.outf.write('%*s %s\n' % (maxlen, ri[0], ri[1])) class cmd_add(Command): __doc__ = """Add specified files or directories. In non-recursive mode, all the named items are added, regardless of whether they were previously ignored. A warning is given if any of the named files are already versioned. In recursive mode (the default), files are treated the same way but the behaviour for directories is different. Directories that are already versioned do not give a warning. All directories, whether already versioned or not, are searched for files or subdirectories that are neither versioned or ignored, and these are added. This search proceeds recursively into versioned directories. If no names are given '.' is assumed. A warning will be printed when nested trees are encountered, unless they are explicitly ignored. Therefore simply saying 'bzr add' will version all files that are currently unknown. Adding a file whose parent directory is not versioned will implicitly add the parent, and so on up to the root. This means you should never need to explicitly add a directory, they'll just get added when you add a file in the directory. --dry-run will show which files would be added, but not actually add them. --file-ids-from will try to use the file ids from the supplied path. It looks up ids trying to find a matching parent directory with the same filename, and then by pure path. This option is rarely needed but can be useful when adding the same logical file into two branches that will be merged later (without showing the two different adds as a conflict). It is also useful when merging another project into a subdirectory of this one. Any files matching patterns in the ignore list will not be added unless they are explicitly mentioned. In recursive mode, files larger than the configuration option add.maximum_file_size will be skipped. Named items are never skipped due to file size. """ takes_args = ['file*'] takes_options = [ Option('no-recurse', help="Don't recursively add the contents of directories.", short_name='N'), Option('dry-run', help="Show what would be done, but don't actually do anything."), 'verbose', Option('file-ids-from', type=unicode, help='Lookup file ids from this tree.'), ] encoding_type = 'replace' _see_also = ['remove', 'ignore'] def run(self, file_list, no_recurse=False, dry_run=False, verbose=False, file_ids_from=None): import bzrlib.add base_tree = None if file_ids_from is not None: try: base_tree, base_path = WorkingTree.open_containing( file_ids_from) except errors.NoWorkingTree: base_branch, base_path = Branch.open_containing( file_ids_from) base_tree = base_branch.basis_tree() action = bzrlib.add.AddFromBaseAction(base_tree, base_path, to_file=self.outf, should_print=(not is_quiet())) else: action = bzrlib.add.AddWithSkipLargeAction(to_file=self.outf, should_print=(not is_quiet())) if base_tree: self.add_cleanup(base_tree.lock_read().unlock) tree, file_list = tree_files_for_add(file_list) added, ignored = tree.smart_add(file_list, not no_recurse, action=action, save=not dry_run) self.cleanup_now() if len(ignored) > 0: if verbose: for glob in sorted(ignored.keys()): for path in ignored[glob]: self.outf.write( gettext("ignored {0} matching \"{1}\"\n").format( path, glob)) class cmd_mkdir(Command): __doc__ = """Create a new versioned directory. This is equivalent to creating the directory and then adding it. """ takes_args = ['dir+'] takes_options = [ Option( 'parents', help='No error if existing, make parent directories as needed.', short_name='p' ) ] encoding_type = 'replace' @classmethod def add_file_with_parents(cls, wt, relpath): if wt.path2id(relpath) is not None: return cls.add_file_with_parents(wt, osutils.dirname(relpath)) wt.add([relpath]) @classmethod def add_file_single(cls, wt, relpath): wt.add([relpath]) def run(self, dir_list, parents=False): if parents: add_file = self.add_file_with_parents else: add_file = self.add_file_single for dir in dir_list: wt, relpath = WorkingTree.open_containing(dir) if parents: try: os.makedirs(dir) except OSError, e: if e.errno != errno.EEXIST: raise else: os.mkdir(dir) add_file(wt, relpath) if not is_quiet(): self.outf.write(gettext('added %s\n') % dir) class cmd_relpath(Command): __doc__ = """Show path of a file relative to root""" takes_args = ['filename'] hidden = True @display_command def run(self, filename): # TODO: jam 20050106 Can relpath return a munged path if # sys.stdout encoding cannot represent it? tree, relpath = WorkingTree.open_containing(filename) self.outf.write(relpath) self.outf.write('\n') class cmd_inventory(Command): __doc__ = """Show inventory of the current working copy or a revision. It is possible to limit the output to a particular entry type using the --kind option. For example: --kind file. It is also possible to restrict the list of files to a specific set. For example: bzr inventory --show-ids this/file """ hidden = True _see_also = ['ls'] takes_options = [ 'revision', 'show-ids', Option('kind', help='List entries of a particular kind: file, directory, symlink.', type=unicode), ] takes_args = ['file*'] @display_command def run(self, revision=None, show_ids=False, kind=None, file_list=None): if kind and kind not in ['file', 'directory', 'symlink']: raise errors.BzrCommandError(gettext('invalid kind %r specified') % (kind,)) revision = _get_one_revision('inventory', revision) work_tree, file_list = WorkingTree.open_containing_paths(file_list) self.add_cleanup(work_tree.lock_read().unlock) if revision is not None: tree = revision.as_tree(work_tree.branch) extra_trees = [work_tree] self.add_cleanup(tree.lock_read().unlock) else: tree = work_tree extra_trees = [] self.add_cleanup(tree.lock_read().unlock) if file_list is not None: file_ids = tree.paths2ids(file_list, trees=extra_trees, require_versioned=True) # find_ids_across_trees may include some paths that don't # exist in 'tree'. entries = tree.iter_entries_by_dir(specific_file_ids=file_ids) else: entries = tree.iter_entries_by_dir() for path, entry in sorted(entries): if kind and kind != entry.kind: continue if path == "": continue if show_ids: self.outf.write('%-50s %s\n' % (path, entry.file_id)) else: self.outf.write(path) self.outf.write('\n') class cmd_mv(Command): __doc__ = """Move or rename a file. :Usage: bzr mv OLDNAME NEWNAME bzr mv SOURCE... DESTINATION If the last argument is a versioned directory, all the other names are moved into it. Otherwise, there must be exactly two arguments and the file is changed to a new name. If OLDNAME does not exist on the filesystem but is versioned and NEWNAME does exist on the filesystem but is not versioned, mv assumes that the file has been manually moved and only updates its internal inventory to reflect that change. The same is valid when moving many SOURCE files to a DESTINATION. Files cannot be moved between branches. """ takes_args = ['names*'] takes_options = [Option("after", help="Move only the bzr identifier" " of the file, because the file has already been moved."), Option('auto', help='Automatically guess renames.'), Option('dry-run', help='Avoid making changes when guessing renames.'), ] aliases = ['move', 'rename'] encoding_type = 'replace' def run(self, names_list, after=False, auto=False, dry_run=False): if auto: return self.run_auto(names_list, after, dry_run) elif dry_run: raise errors.BzrCommandError(gettext('--dry-run requires --auto.')) if names_list is None: names_list = [] if len(names_list) < 2: raise errors.BzrCommandError(gettext("missing file argument")) tree, rel_names = WorkingTree.open_containing_paths(names_list, canonicalize=False) for file_name in rel_names[0:-1]: if file_name == '': raise errors.BzrCommandError(gettext("can not move root of branch")) self.add_cleanup(tree.lock_tree_write().unlock) self._run(tree, names_list, rel_names, after) def run_auto(self, names_list, after, dry_run): if names_list is not None and len(names_list) > 1: raise errors.BzrCommandError(gettext('Only one path may be specified to' ' --auto.')) if after: raise errors.BzrCommandError(gettext('--after cannot be specified with' ' --auto.')) work_tree, file_list = WorkingTree.open_containing_paths( names_list, default_directory='.') self.add_cleanup(work_tree.lock_tree_write().unlock) rename_map.RenameMap.guess_renames(work_tree, dry_run) def _run(self, tree, names_list, rel_names, after): into_existing = osutils.isdir(names_list[-1]) if into_existing and len(names_list) == 2: # special cases: # a. case-insensitive filesystem and change case of dir # b. move directory after the fact (if the source used to be # a directory, but now doesn't exist in the working tree # and the target is an existing directory, just rename it) if (not tree.case_sensitive and rel_names[0].lower() == rel_names[1].lower()): into_existing = False else: # 'fix' the case of a potential 'from' from_id = tree.path2id( tree.get_canonical_inventory_path(rel_names[0])) if (not osutils.lexists(names_list[0]) and from_id and tree.stored_kind(from_id) == "directory"): into_existing = False # move/rename if into_existing: # move into existing directory # All entries reference existing inventory items, so fix them up # for cicp file-systems. rel_names = tree.get_canonical_inventory_paths(rel_names) for src, dest in tree.move(rel_names[:-1], rel_names[-1], after=after): if not is_quiet(): self.outf.write("%s => %s\n" % (src, dest)) else: if len(names_list) != 2: raise errors.BzrCommandError(gettext('to mv multiple files the' ' destination must be a versioned' ' directory')) # for cicp file-systems: the src references an existing inventory # item: src = tree.get_canonical_inventory_path(rel_names[0]) # Find the canonical version of the destination: In all cases, the # parent of the target must be in the inventory, so we fetch the # canonical version from there (we do not always *use* the # canonicalized tail portion - we may be attempting to rename the # case of the tail) canon_dest = tree.get_canonical_inventory_path(rel_names[1]) dest_parent = osutils.dirname(canon_dest) spec_tail = osutils.basename(rel_names[1]) # For a CICP file-system, we need to avoid creating 2 inventory # entries that differ only by case. So regardless of the case # we *want* to use (ie, specified by the user or the file-system), # we must always choose to use the case of any existing inventory # items. The only exception to this is when we are attempting a # case-only rename (ie, canonical versions of src and dest are # the same) dest_id = tree.path2id(canon_dest) if dest_id is None or tree.path2id(src) == dest_id: # No existing item we care about, so work out what case we # are actually going to use. if after: # If 'after' is specified, the tail must refer to a file on disk. if dest_parent: dest_parent_fq = osutils.pathjoin(tree.basedir, dest_parent) else: # pathjoin with an empty tail adds a slash, which breaks # relpath :( dest_parent_fq = tree.basedir dest_tail = osutils.canonical_relpath( dest_parent_fq, osutils.pathjoin(dest_parent_fq, spec_tail)) else: # not 'after', so case as specified is used dest_tail = spec_tail else: # Use the existing item so 'mv' fails with AlreadyVersioned. dest_tail = os.path.basename(canon_dest) dest = osutils.pathjoin(dest_parent, dest_tail) mutter("attempting to move %s => %s", src, dest) tree.rename_one(src, dest, after=after) if not is_quiet(): self.outf.write("%s => %s\n" % (src, dest)) class cmd_pull(Command): __doc__ = """Turn this branch into a mirror of another branch. By default, this command only works on branches that have not diverged. Branches are considered diverged if the destination branch's most recent commit is one that has not been merged (directly or indirectly) into the parent. If branches have diverged, you can use 'bzr merge' to integrate the changes from one into the other. Once one branch has merged, the other should be able to pull it again. If you want to replace your local changes and just want your branch to match the remote one, use pull --overwrite. This will work even if the two branches have diverged. If there is no default location set, the first pull will set it (use --no-remember to avoid setting it). After that, you can omit the location to use the default. To change the default, use --remember. The value will only be saved if the remote location can be accessed. The --verbose option will display the revisions pulled using the log_format configuration option. You can use a different format by overriding it with -Olog_format=. Note: The location can be specified either in the form of a branch, or in the form of a path to a file containing a merge directive generated with bzr send. """ _see_also = ['push', 'update', 'status-flags', 'send'] takes_options = ['remember', 'overwrite', 'revision', custom_help('verbose', help='Show logs of pulled revisions.'), custom_help('directory', help='Branch to pull into, ' 'rather than the one containing the working directory.'), Option('local', help="Perform a local pull in a bound " "branch. Local pulls are not applied to " "the master branch." ), Option('show-base', help="Show base revision text in conflicts."), Option('overwrite-tags', help="Overwrite tags only."), ] takes_args = ['location?'] encoding_type = 'replace' def run(self, location=None, remember=None, overwrite=False, revision=None, verbose=False, directory=None, local=False, show_base=False, overwrite_tags=False): if overwrite: overwrite = ["history", "tags"] elif overwrite_tags: overwrite = ["tags"] else: overwrite = [] # FIXME: too much stuff is in the command class revision_id = None mergeable = None if directory is None: directory = u'.' try: tree_to = WorkingTree.open_containing(directory)[0] branch_to = tree_to.branch self.add_cleanup(tree_to.lock_write().unlock) except errors.NoWorkingTree: tree_to = None branch_to = Branch.open_containing(directory)[0] self.add_cleanup(branch_to.lock_write().unlock) if show_base: warning(gettext("No working tree, ignoring --show-base")) if local and not branch_to.get_bound_location(): raise errors.LocalRequiresBoundBranch() possible_transports = [] if location is not None: try: mergeable = bundle.read_mergeable_from_url(location, possible_transports=possible_transports) except errors.NotABundle: mergeable = None stored_loc = branch_to.get_parent() if location is None: if stored_loc is None: raise errors.BzrCommandError(gettext("No pull location known or" " specified.")) else: display_url = urlutils.unescape_for_display(stored_loc, self.outf.encoding) if not is_quiet(): self.outf.write(gettext("Using saved parent location: %s\n") % display_url) location = stored_loc revision = _get_one_revision('pull', revision) if mergeable is not None: if revision is not None: raise errors.BzrCommandError(gettext( 'Cannot use -r with merge directives or bundles')) mergeable.install_revisions(branch_to.repository) base_revision_id, revision_id, verified = \ mergeable.get_merge_request(branch_to.repository) branch_from = branch_to else: branch_from = Branch.open(location, possible_transports=possible_transports) self.add_cleanup(branch_from.lock_read().unlock) # Remembers if asked explicitly or no previous location is set if (remember or (remember is None and branch_to.get_parent() is None)): # FIXME: This shouldn't be done before the pull # succeeds... -- vila 2012-01-02 branch_to.set_parent(branch_from.base) if revision is not None: revision_id = revision.as_revision_id(branch_from) if tree_to is not None: view_info = _get_view_info_for_change_reporter(tree_to) change_reporter = delta._ChangeReporter( unversioned_filter=tree_to.is_ignored, view_info=view_info) result = tree_to.pull( branch_from, overwrite, revision_id, change_reporter, local=local, show_base=show_base) else: result = branch_to.pull( branch_from, overwrite, revision_id, local=local) result.report(self.outf) if verbose and result.old_revid != result.new_revid: log.show_branch_change( branch_to, self.outf, result.old_revno, result.old_revid) if getattr(result, 'tag_conflicts', None): return 1 else: return 0 class cmd_push(Command): __doc__ = """Update a mirror of this branch. The target branch will not have its working tree populated because this is both expensive, and is not supported on remote file systems. Some smart servers or protocols *may* put the working tree in place in the future. This command only works on branches that have not diverged. Branches are considered diverged if the destination branch's most recent commit is one that has not been merged (directly or indirectly) by the source branch. If branches have diverged, you can use 'bzr push --overwrite' to replace the other branch completely, discarding its unmerged changes. If you want to ensure you have the different changes in the other branch, do a merge (see bzr help merge) from the other branch, and commit that. After that you will be able to do a push without '--overwrite'. If there is no default push location set, the first push will set it (use --no-remember to avoid setting it). After that, you can omit the location to use the default. To change the default, use --remember. The value will only be saved if the remote location can be accessed. The --verbose option will display the revisions pushed using the log_format configuration option. You can use a different format by overriding it with -Olog_format=. """ _see_also = ['pull', 'update', 'working-trees'] takes_options = ['remember', 'overwrite', 'verbose', 'revision', Option('create-prefix', help='Create the path leading up to the branch ' 'if it does not already exist.'), custom_help('directory', help='Branch to push from, ' 'rather than the one containing the working directory.'), Option('use-existing-dir', help='By default push will fail if the target' ' directory exists, but does not already' ' have a control directory. This flag will' ' allow push to proceed.'), Option('stacked', help='Create a stacked branch that references the public location ' 'of the parent branch.'), Option('stacked-on', help='Create a stacked branch that refers to another branch ' 'for the commit history. Only the work not present in the ' 'referenced branch is included in the branch created.', type=unicode), Option('strict', help='Refuse to push if there are uncommitted changes in' ' the working tree, --no-strict disables the check.'), Option('no-tree', help="Don't populate the working tree, even for protocols" " that support it."), Option('overwrite-tags', help="Overwrite tags only."), ] takes_args = ['location?'] encoding_type = 'replace' def run(self, location=None, remember=None, overwrite=False, create_prefix=False, verbose=False, revision=None, use_existing_dir=False, directory=None, stacked_on=None, stacked=False, strict=None, no_tree=False, overwrite_tags=False): from bzrlib.push import _show_push_branch if overwrite: overwrite = ["history", "tags"] elif overwrite_tags: overwrite = ["tags"] else: overwrite = [] if directory is None: directory = '.' # Get the source branch (tree, br_from, _unused) = controldir.ControlDir.open_containing_tree_or_branch(directory) # Get the tip's revision_id revision = _get_one_revision('push', revision) if revision is not None: revision_id = revision.in_history(br_from).rev_id else: revision_id = None if tree is not None and revision_id is None: tree.check_changed_or_out_of_date( strict, 'push_strict', more_error='Use --no-strict to force the push.', more_warning='Uncommitted changes will not be pushed.') # Get the stacked_on branch, if any if stacked_on is not None: stacked_on = urlutils.normalize_url(stacked_on) elif stacked: parent_url = br_from.get_parent() if parent_url: parent = Branch.open(parent_url) stacked_on = parent.get_public_branch() if not stacked_on: # I considered excluding non-http url's here, thus forcing # 'public' branches only, but that only works for some # users, so it's best to just depend on the user spotting an # error by the feedback given to them. RBC 20080227. stacked_on = parent_url if not stacked_on: raise errors.BzrCommandError(gettext( "Could not determine branch to refer to.")) # Get the destination location if location is None: stored_loc = br_from.get_push_location() if stored_loc is None: parent_loc = br_from.get_parent() if parent_loc: raise errors.BzrCommandError(gettext( "No push location known or specified. To push to the " "parent branch (at %s), use 'bzr push :parent'." % urlutils.unescape_for_display(parent_loc, self.outf.encoding))) else: raise errors.BzrCommandError(gettext( "No push location known or specified.")) else: display_url = urlutils.unescape_for_display(stored_loc, self.outf.encoding) note(gettext("Using saved push location: %s") % display_url) location = stored_loc _show_push_branch(br_from, revision_id, location, self.outf, verbose=verbose, overwrite=overwrite, remember=remember, stacked_on=stacked_on, create_prefix=create_prefix, use_existing_dir=use_existing_dir, no_tree=no_tree) class cmd_branch(Command): __doc__ = """Create a new branch that is a copy of an existing branch. If the TO_LOCATION is omitted, the last component of the FROM_LOCATION will be used. In other words, "branch ../foo/bar" will attempt to create ./bar. If the FROM_LOCATION has no / or path separator embedded, the TO_LOCATION is derived from the FROM_LOCATION by stripping a leading scheme or drive identifier, if any. For example, "branch lp:foo-bar" will attempt to create ./foo-bar. To retrieve the branch as of a particular revision, supply the --revision parameter, as in "branch foo/bar -r 5". The synonyms 'clone' and 'get' for this command are deprecated. """ _see_also = ['checkout'] takes_args = ['from_location', 'to_location?'] takes_options = ['revision', Option('hardlink', help='Hard-link working tree files where possible.'), Option('files-from', type=str, help="Get file contents from this tree."), Option('no-tree', help="Create a branch without a working-tree."), Option('switch', help="Switch the checkout in the current directory " "to the new branch."), Option('stacked', help='Create a stacked branch referring to the source branch. ' 'The new branch will depend on the availability of the source ' 'branch for all operations.'), Option('standalone', help='Do not use a shared repository, even if available.'), Option('use-existing-dir', help='By default branch will fail if the target' ' directory exists, but does not already' ' have a control directory. This flag will' ' allow branch to proceed.'), Option('bind', help="Bind new branch to from location."), ] aliases = ['get', 'clone'] def run(self, from_location, to_location=None, revision=None, hardlink=False, stacked=False, standalone=False, no_tree=False, use_existing_dir=False, switch=False, bind=False, files_from=None): from bzrlib import switch as _mod_switch from bzrlib.tag import _merge_tags_if_possible if self.invoked_as in ['get', 'clone']: ui.ui_factory.show_user_warning( 'deprecated_command', deprecated_name=self.invoked_as, recommended_name='branch', deprecated_in_version='2.4') accelerator_tree, br_from = controldir.ControlDir.open_tree_or_branch( from_location) if not (hardlink or files_from): # accelerator_tree is usually slower because you have to read N # files (no readahead, lots of seeks, etc), but allow the user to # explicitly request it accelerator_tree = None if files_from is not None and files_from != from_location: accelerator_tree = WorkingTree.open(files_from) revision = _get_one_revision('branch', revision) self.add_cleanup(br_from.lock_read().unlock) if revision is not None: revision_id = revision.as_revision_id(br_from) else: # FIXME - wt.last_revision, fallback to branch, fall back to # None or perhaps NULL_REVISION to mean copy nothing # RBC 20060209 revision_id = br_from.last_revision() if to_location is None: to_location = getattr(br_from, "name", None) if not to_location: to_location = urlutils.derive_to_location(from_location) to_transport = transport.get_transport(to_location) try: to_transport.mkdir('.') except errors.FileExists: try: to_dir = controldir.ControlDir.open_from_transport( to_transport) except errors.NotBranchError: if not use_existing_dir: raise errors.BzrCommandError(gettext('Target directory "%s" ' 'already exists.') % to_location) else: to_dir = None else: try: to_dir.open_branch() except errors.NotBranchError: pass else: raise errors.AlreadyBranchError(to_location) except errors.NoSuchFile: raise errors.BzrCommandError(gettext('Parent of "%s" does not exist.') % to_location) else: to_dir = None if to_dir is None: try: # preserve whatever source format we have. to_dir = br_from.bzrdir.sprout(to_transport.base, revision_id, possible_transports=[to_transport], accelerator_tree=accelerator_tree, hardlink=hardlink, stacked=stacked, force_new_repo=standalone, create_tree_if_local=not no_tree, source_branch=br_from) branch = to_dir.open_branch( possible_transports=[ br_from.bzrdir.root_transport, to_transport]) except errors.NoSuchRevision: to_transport.delete_tree('.') msg = gettext("The branch {0} has no revision {1}.").format( from_location, revision) raise errors.BzrCommandError(msg) else: try: to_repo = to_dir.open_repository() except errors.NoRepositoryPresent: to_repo = to_dir.create_repository() to_repo.fetch(br_from.repository, revision_id=revision_id) branch = br_from.sprout(to_dir, revision_id=revision_id) _merge_tags_if_possible(br_from, branch) # If the source branch is stacked, the new branch may # be stacked whether we asked for that explicitly or not. # We therefore need a try/except here and not just 'if stacked:' try: note(gettext('Created new stacked branch referring to %s.') % branch.get_stacked_on_url()) except (errors.NotStacked, errors.UnstackableBranchFormat, errors.UnstackableRepositoryFormat), e: note(ngettext('Branched %d revision.', 'Branched %d revisions.', branch.revno()) % branch.revno()) if bind: # Bind to the parent parent_branch = Branch.open(from_location) branch.bind(parent_branch) note(gettext('New branch bound to %s') % from_location) if switch: # Switch to the new branch wt, _ = WorkingTree.open_containing('.') _mod_switch.switch(wt.bzrdir, branch) note(gettext('Switched to branch: %s'), urlutils.unescape_for_display(branch.base, 'utf-8')) class cmd_branches(Command): __doc__ = """List the branches available at the current location. This command will print the names of all the branches at the current location. """ takes_args = ['location?'] takes_options = [ Option('recursive', short_name='R', help='Recursively scan for branches rather than ' 'just looking in the specified location.')] def run(self, location=".", recursive=False): if recursive: t = transport.get_transport(location) if not t.listable(): raise errors.BzrCommandError( "Can't scan this type of location.") for b in controldir.ControlDir.find_branches(t): self.outf.write("%s\n" % urlutils.unescape_for_display( urlutils.relative_url(t.base, b.base), self.outf.encoding).rstrip("/")) else: dir = controldir.ControlDir.open_containing(location)[0] try: active_branch = dir.open_branch(name="") except errors.NotBranchError: active_branch = None names = {} for name, branch in iter_sibling_branches(dir): if name == "": continue active = (active_branch is not None and active_branch.base == branch.base) names[name] = active # Only mention the current branch explicitly if it's not # one of the colocated branches if not any(names.values()) and active_branch is not None: self.outf.write("* %s\n" % gettext("(default)")) for name in sorted(names.keys()): active = names[name] if active: prefix = "*" else: prefix = " " self.outf.write("%s %s\n" % ( prefix, name.encode(self.outf.encoding))) class cmd_checkout(Command): __doc__ = """Create a new checkout of an existing branch. If BRANCH_LOCATION is omitted, checkout will reconstitute a working tree for the branch found in '.'. This is useful if you have removed the working tree or if it was never created - i.e. if you pushed the branch to its current location using SFTP. If the TO_LOCATION is omitted, the last component of the BRANCH_LOCATION will be used. In other words, "checkout ../foo/bar" will attempt to create ./bar. If the BRANCH_LOCATION has no / or path separator embedded, the TO_LOCATION is derived from the BRANCH_LOCATION by stripping a leading scheme or drive identifier, if any. For example, "checkout lp:foo-bar" will attempt to create ./foo-bar. To retrieve the branch as of a particular revision, supply the --revision parameter, as in "checkout foo/bar -r 5". Note that this will be immediately out of date [so you cannot commit] but it may be useful (i.e. to examine old code.) """ _see_also = ['checkouts', 'branch', 'working-trees', 'remove-tree'] takes_args = ['branch_location?', 'to_location?'] takes_options = ['revision', Option('lightweight', help="Perform a lightweight checkout. Lightweight " "checkouts depend on access to the branch for " "every operation. Normal checkouts can perform " "common operations like diff and status without " "such access, and also support local commits." ), Option('files-from', type=str, help="Get file contents from this tree."), Option('hardlink', help='Hard-link working tree files where possible.' ), ] aliases = ['co'] def run(self, branch_location=None, to_location=None, revision=None, lightweight=False, files_from=None, hardlink=False): if branch_location is None: branch_location = osutils.getcwd() to_location = branch_location accelerator_tree, source = controldir.ControlDir.open_tree_or_branch( branch_location) if not (hardlink or files_from): # accelerator_tree is usually slower because you have to read N # files (no readahead, lots of seeks, etc), but allow the user to # explicitly request it accelerator_tree = None revision = _get_one_revision('checkout', revision) if files_from is not None and files_from != branch_location: accelerator_tree = WorkingTree.open(files_from) if revision is not None: revision_id = revision.as_revision_id(source) else: revision_id = None if to_location is None: to_location = urlutils.derive_to_location(branch_location) # if the source and to_location are the same, # and there is no working tree, # then reconstitute a branch if (osutils.abspath(to_location) == osutils.abspath(branch_location)): try: source.bzrdir.open_workingtree() except errors.NoWorkingTree: source.bzrdir.create_workingtree(revision_id) return source.create_checkout(to_location, revision_id, lightweight, accelerator_tree, hardlink) class cmd_renames(Command): __doc__ = """Show list of renamed files. """ # TODO: Option to show renames between two historical versions. # TODO: Only show renames under dir, rather than in the whole branch. _see_also = ['status'] takes_args = ['dir?'] @display_command def run(self, dir=u'.'): tree = WorkingTree.open_containing(dir)[0] self.add_cleanup(tree.lock_read().unlock) old_tree = tree.basis_tree() self.add_cleanup(old_tree.lock_read().unlock) renames = [] iterator = tree.iter_changes(old_tree, include_unchanged=True) for f, paths, c, v, p, n, k, e in iterator: if paths[0] == paths[1]: continue if None in (paths): continue renames.append(paths) renames.sort() for old_name, new_name in renames: self.outf.write("%s => %s\n" % (old_name, new_name)) class cmd_update(Command): __doc__ = """Update a working tree to a new revision. This will perform a merge of the destination revision (the tip of the branch, or the specified revision) into the working tree, and then make that revision the basis revision for the working tree. You can use this to visit an older revision, or to update a working tree that is out of date from its branch. If there are any uncommitted changes in the tree, they will be carried across and remain as uncommitted changes after the update. To discard these changes, use 'bzr revert'. The uncommitted changes may conflict with the changes brought in by the change in basis revision. If the tree's branch is bound to a master branch, bzr will also update the branch from the master. You cannot update just a single file or directory, because each Bazaar working tree has just a single basis revision. If you want to restore a file that has been removed locally, use 'bzr revert' instead of 'bzr update'. If you want to restore a file to its state in a previous revision, use 'bzr revert' with a '-r' option, or use 'bzr cat' to write out the old content of that file to a new location. The 'dir' argument, if given, must be the location of the root of a working tree to update. By default, the working tree that contains the current working directory is used. """ _see_also = ['pull', 'working-trees', 'status-flags'] takes_args = ['dir?'] takes_options = ['revision', Option('show-base', help="Show base revision text in conflicts."), ] aliases = ['up'] def run(self, dir=None, revision=None, show_base=None): if revision is not None and len(revision) != 1: raise errors.BzrCommandError(gettext( "bzr update --revision takes exactly one revision")) if dir is None: tree = WorkingTree.open_containing('.')[0] else: tree, relpath = WorkingTree.open_containing(dir) if relpath: # See bug 557886. raise errors.BzrCommandError(gettext( "bzr update can only update a whole tree, " "not a file or subdirectory")) branch = tree.branch possible_transports = [] master = branch.get_master_branch( possible_transports=possible_transports) if master is not None: branch_location = master.base tree.lock_write() else: branch_location = tree.branch.base tree.lock_tree_write() self.add_cleanup(tree.unlock) # get rid of the final '/' and be ready for display branch_location = urlutils.unescape_for_display( branch_location.rstrip('/'), self.outf.encoding) existing_pending_merges = tree.get_parent_ids()[1:] if master is None: old_tip = None else: # may need to fetch data into a heavyweight checkout # XXX: this may take some time, maybe we should display a # message old_tip = branch.update(possible_transports) if revision is not None: revision_id = revision[0].as_revision_id(branch) else: revision_id = branch.last_revision() if revision_id == _mod_revision.ensure_null(tree.last_revision()): revno = branch.revision_id_to_dotted_revno(revision_id) note(gettext("Tree is up to date at revision {0} of branch {1}" ).format('.'.join(map(str, revno)), branch_location)) return 0 view_info = _get_view_info_for_change_reporter(tree) change_reporter = delta._ChangeReporter( unversioned_filter=tree.is_ignored, view_info=view_info) try: conflicts = tree.update( change_reporter, possible_transports=possible_transports, revision=revision_id, old_tip=old_tip, show_base=show_base) except errors.NoSuchRevision, e: raise errors.BzrCommandError(gettext( "branch has no revision %s\n" "bzr update --revision only works" " for a revision in the branch history") % (e.revision)) revno = tree.branch.revision_id_to_dotted_revno( _mod_revision.ensure_null(tree.last_revision())) note(gettext('Updated to revision {0} of branch {1}').format( '.'.join(map(str, revno)), branch_location)) parent_ids = tree.get_parent_ids() if parent_ids[1:] and parent_ids[1:] != existing_pending_merges: note(gettext('Your local commits will now show as pending merges with ' "'bzr status', and can be committed with 'bzr commit'.")) if conflicts != 0: return 1 else: return 0 class cmd_info(Command): __doc__ = """Show information about a working tree, branch or repository. This command will show all known locations and formats associated to the tree, branch or repository. In verbose mode, statistical information is included with each report. To see extended statistic information, use a verbosity level of 2 or higher by specifying the verbose option multiple times, e.g. -vv. Branches and working trees will also report any missing revisions. :Examples: Display information on the format and related locations: bzr info Display the above together with extended format information and basic statistics (like the number of files in the working tree and number of revisions in the branch and repository): bzr info -v Display the above together with number of committers to the branch: bzr info -vv """ _see_also = ['revno', 'working-trees', 'repositories'] takes_args = ['location?'] takes_options = ['verbose'] encoding_type = 'replace' @display_command def run(self, location=None, verbose=False): if verbose: noise_level = get_verbosity_level() else: noise_level = 0 from bzrlib.info import show_bzrdir_info show_bzrdir_info(controldir.ControlDir.open_containing(location)[0], verbose=noise_level, outfile=self.outf) class cmd_remove(Command): __doc__ = """Remove files or directories. This makes Bazaar stop tracking changes to the specified files. Bazaar will delete them if they can easily be recovered using revert otherwise they will be backed up (adding an extension of the form .~#~). If no options or parameters are given Bazaar will scan for files that are being tracked by Bazaar but missing in your tree and stop tracking them for you. """ takes_args = ['file*'] takes_options = ['verbose', Option('new', help='Only remove files that have never been committed.'), RegistryOption.from_kwargs('file-deletion-strategy', 'The file deletion mode to be used.', title='Deletion Strategy', value_switches=True, enum_switch=False, safe='Backup changed files (default).', keep='Delete from bzr but leave the working copy.', no_backup='Don\'t backup changed files.'), ] aliases = ['rm', 'del'] encoding_type = 'replace' def run(self, file_list, verbose=False, new=False, file_deletion_strategy='safe'): tree, file_list = WorkingTree.open_containing_paths(file_list) if file_list is not None: file_list = [f for f in file_list] self.add_cleanup(tree.lock_write().unlock) # Heuristics should probably all move into tree.remove_smart or # some such? if new: added = tree.changes_from(tree.basis_tree(), specific_files=file_list).added file_list = sorted([f[0] for f in added], reverse=True) if len(file_list) == 0: raise errors.BzrCommandError(gettext('No matching files.')) elif file_list is None: # missing files show up in iter_changes(basis) as # versioned-with-no-kind. missing = [] for change in tree.iter_changes(tree.basis_tree()): # Find paths in the working tree that have no kind: if change[1][1] is not None and change[6][1] is None: missing.append(change[1][1]) file_list = sorted(missing, reverse=True) file_deletion_strategy = 'keep' tree.remove(file_list, verbose=verbose, to_file=self.outf, keep_files=file_deletion_strategy=='keep', force=(file_deletion_strategy=='no-backup')) class cmd_file_id(Command): __doc__ = """Print file_id of a particular file or directory. The file_id is assigned when the file is first added and remains the same through all revisions where the file exists, even when it is moved or renamed. """ hidden = True _see_also = ['inventory', 'ls'] takes_args = ['filename'] @display_command def run(self, filename): tree, relpath = WorkingTree.open_containing(filename) i = tree.path2id(relpath) if i is None: raise errors.NotVersionedError(filename) else: self.outf.write(i + '\n') class cmd_file_path(Command): __doc__ = """Print path of file_ids to a file or directory. This prints one line for each directory down to the target, starting at the branch root. """ hidden = True takes_args = ['filename'] @display_command def run(self, filename): tree, relpath = WorkingTree.open_containing(filename) fid = tree.path2id(relpath) if fid is None: raise errors.NotVersionedError(filename) segments = osutils.splitpath(relpath) for pos in range(1, len(segments) + 1): path = osutils.joinpath(segments[:pos]) self.outf.write("%s\n" % tree.path2id(path)) class cmd_reconcile(Command): __doc__ = """Reconcile bzr metadata in a branch. This can correct data mismatches that may have been caused by previous ghost operations or bzr upgrades. You should only need to run this command if 'bzr check' or a bzr developer advises you to run it. If a second branch is provided, cross-branch reconciliation is also attempted, which will check that data like the tree root id which was not present in very early bzr versions is represented correctly in both branches. At the same time it is run it may recompress data resulting in a potential saving in disk space or performance gain. The branch *MUST* be on a listable system such as local disk or sftp. """ _see_also = ['check'] takes_args = ['branch?'] takes_options = [ Option('canonicalize-chks', help='Make sure CHKs are in canonical form (repairs ' 'bug 522637).', hidden=True), ] def run(self, branch=".", canonicalize_chks=False): from bzrlib.reconcile import reconcile dir = controldir.ControlDir.open(branch) reconcile(dir, canonicalize_chks=canonicalize_chks) class cmd_revision_history(Command): __doc__ = """Display the list of revision ids on a branch.""" _see_also = ['log'] takes_args = ['location?'] hidden = True @display_command def run(self, location="."): branch = Branch.open_containing(location)[0] self.add_cleanup(branch.lock_read().unlock) graph = branch.repository.get_graph() history = list(graph.iter_lefthand_ancestry(branch.last_revision(), [_mod_revision.NULL_REVISION])) for revid in reversed(history): self.outf.write(revid) self.outf.write('\n') class cmd_ancestry(Command): __doc__ = """List all revisions merged into this branch.""" _see_also = ['log', 'revision-history'] takes_args = ['location?'] hidden = True @display_command def run(self, location="."): try: wt = WorkingTree.open_containing(location)[0] except errors.NoWorkingTree: b = Branch.open(location) last_revision = b.last_revision() else: b = wt.branch last_revision = wt.last_revision() self.add_cleanup(b.repository.lock_read().unlock) graph = b.repository.get_graph() revisions = [revid for revid, parents in graph.iter_ancestry([last_revision])] for revision_id in reversed(revisions): if _mod_revision.is_null(revision_id): continue self.outf.write(revision_id + '\n') class cmd_init(Command): __doc__ = """Make a directory into a versioned branch. Use this to create an empty branch, or before importing an existing project. If there is a repository in a parent directory of the location, then the history of the branch will be stored in the repository. Otherwise init creates a standalone branch which carries its own history in the .bzr directory. If there is already a branch at the location but it has no working tree, the tree can be populated with 'bzr checkout'. Recipe for importing a tree of files:: cd ~/project bzr init bzr add . bzr status bzr commit -m "imported project" """ _see_also = ['init-repository', 'branch', 'checkout'] takes_args = ['location?'] takes_options = [ Option('create-prefix', help='Create the path leading up to the branch ' 'if it does not already exist.'), RegistryOption('format', help='Specify a format for this branch. ' 'See "help formats".', lazy_registry=('bzrlib.controldir', 'format_registry'), converter=lambda name: controldir.format_registry.make_bzrdir(name), value_switches=True, title="Branch format", ), Option('append-revisions-only', help='Never change revnos or the existing log.' ' Append revisions to it only.'), Option('no-tree', 'Create a branch without a working tree.') ] def run(self, location=None, format=None, append_revisions_only=False, create_prefix=False, no_tree=False): if format is None: format = controldir.format_registry.make_bzrdir('default') if location is None: location = u'.' to_transport = transport.get_transport(location) # The path has to exist to initialize a # branch inside of it. # Just using os.mkdir, since I don't # believe that we want to create a bunch of # locations if the user supplies an extended path try: to_transport.ensure_base() except errors.NoSuchFile: if not create_prefix: raise errors.BzrCommandError(gettext("Parent directory of %s" " does not exist." "\nYou may supply --create-prefix to create all" " leading parent directories.") % location) to_transport.create_prefix() try: a_bzrdir = controldir.ControlDir.open_from_transport(to_transport) except errors.NotBranchError: # really a NotBzrDir error... create_branch = controldir.ControlDir.create_branch_convenience if no_tree: force_new_tree = False else: force_new_tree = None branch = create_branch(to_transport.base, format=format, possible_transports=[to_transport], force_new_tree=force_new_tree) a_bzrdir = branch.bzrdir else: from bzrlib.transport.local import LocalTransport if a_bzrdir.has_branch(): if (isinstance(to_transport, LocalTransport) and not a_bzrdir.has_workingtree()): raise errors.BranchExistsWithoutWorkingTree(location) raise errors.AlreadyBranchError(location) branch = a_bzrdir.create_branch() if not no_tree and not a_bzrdir.has_workingtree(): a_bzrdir.create_workingtree() if append_revisions_only: try: branch.set_append_revisions_only(True) except errors.UpgradeRequired: raise errors.BzrCommandError(gettext('This branch format cannot be set' ' to append-revisions-only. Try --default.')) if not is_quiet(): from bzrlib.info import describe_layout, describe_format try: tree = a_bzrdir.open_workingtree(recommend_upgrade=False) except (errors.NoWorkingTree, errors.NotLocalUrl): tree = None repository = branch.repository layout = describe_layout(repository, branch, tree).lower() format = describe_format(a_bzrdir, repository, branch, tree) self.outf.write(gettext("Created a {0} (format: {1})\n").format( layout, format)) if repository.is_shared(): #XXX: maybe this can be refactored into transport.path_or_url() url = repository.bzrdir.root_transport.external_url() try: url = urlutils.local_path_from_url(url) except errors.InvalidURL: pass self.outf.write(gettext("Using shared repository: %s\n") % url) class cmd_init_repository(Command): __doc__ = """Create a shared repository for branches to share storage space. New branches created under the repository directory will store their revisions in the repository, not in the branch directory. For branches with shared history, this reduces the amount of storage needed and speeds up the creation of new branches. If the --no-trees option is given then the branches in the repository will not have working trees by default. They will still exist as directories on disk, but they will not have separate copies of the files at a certain revision. This can be useful for repositories that store branches which are interacted with through checkouts or remote branches, such as on a server. :Examples: Create a shared repository holding just branches:: bzr init-repo --no-trees repo bzr init repo/trunk Make a lightweight checkout elsewhere:: bzr checkout --lightweight repo/trunk trunk-checkout cd trunk-checkout (add files here) """ _see_also = ['init', 'branch', 'checkout', 'repositories'] takes_args = ["location"] takes_options = [RegistryOption('format', help='Specify a format for this repository. See' ' "bzr help formats" for details.', lazy_registry=('bzrlib.controldir', 'format_registry'), converter=lambda name: controldir.format_registry.make_bzrdir(name), value_switches=True, title='Repository format'), Option('no-trees', help='Branches in the repository will default to' ' not having a working tree.'), ] aliases = ["init-repo"] def run(self, location, format=None, no_trees=False): if format is None: format = controldir.format_registry.make_bzrdir('default') if location is None: location = '.' to_transport = transport.get_transport(location) (repo, newdir, require_stacking, repository_policy) = ( format.initialize_on_transport_ex(to_transport, create_prefix=True, make_working_trees=not no_trees, shared_repo=True, force_new_repo=True, use_existing_dir=True, repo_format_name=format.repository_format.get_format_string())) if not is_quiet(): from bzrlib.info import show_bzrdir_info show_bzrdir_info(newdir, verbose=0, outfile=self.outf) class cmd_diff(Command): __doc__ = """Show differences in the working tree, between revisions or branches. If no arguments are given, all changes for the current tree are listed. If files are given, only the changes in those files are listed. Remote and multiple branches can be compared by using the --old and --new options. If not provided, the default for both is derived from the first argument, if any, or the current tree if no arguments are given. "bzr diff -p1" is equivalent to "bzr diff --prefix old/:new/", and produces patches suitable for "patch -p1". Note that when using the -r argument with a range of revisions, the differences are computed between the two specified revisions. That is, the command does not show the changes introduced by the first revision in the range. This differs from the interpretation of revision ranges used by "bzr log" which includes the first revision in the range. :Exit values: 1 - changed 2 - unrepresentable changes 3 - error 0 - no change :Examples: Shows the difference in the working tree versus the last commit:: bzr diff Difference between the working tree and revision 1:: bzr diff -r1 Difference between revision 3 and revision 1:: bzr diff -r1..3 Difference between revision 3 and revision 1 for branch xxx:: bzr diff -r1..3 xxx The changes introduced by revision 2 (equivalent to -r1..2):: bzr diff -c2 To see the changes introduced by revision X:: bzr diff -cX Note that in the case of a merge, the -c option shows the changes compared to the left hand parent. To see the changes against another parent, use:: bzr diff -r..X The changes between the current revision and the previous revision (equivalent to -c-1 and -r-2..-1) bzr diff -r-2.. Show just the differences for file NEWS:: bzr diff NEWS Show the differences in working tree xxx for file NEWS:: bzr diff xxx/NEWS Show the differences from branch xxx to this working tree: bzr diff --old xxx Show the differences between two branches for file NEWS:: bzr diff --old xxx --new yyy NEWS Same as 'bzr diff' but prefix paths with old/ and new/:: bzr diff --prefix old/:new/ Show the differences using a custom diff program with options:: bzr diff --using /usr/bin/diff --diff-options -wu """ _see_also = ['status'] takes_args = ['file*'] takes_options = [ Option('diff-options', type=str, help='Pass these options to the external diff program.'), Option('prefix', type=str, short_name='p', help='Set prefixes added to old and new filenames, as ' 'two values separated by a colon. (eg "old/:new/").'), Option('old', help='Branch/tree to compare from.', type=unicode, ), Option('new', help='Branch/tree to compare to.', type=unicode, ), 'revision', 'change', Option('using', help='Use this command to compare files.', type=unicode, ), RegistryOption('format', short_name='F', help='Diff format to use.', lazy_registry=('bzrlib.diff', 'format_registry'), title='Diff format'), Option('context', help='How many lines of context to show.', type=int, ), ] aliases = ['di', 'dif'] encoding_type = 'exact' @display_command def run(self, revision=None, file_list=None, diff_options=None, prefix=None, old=None, new=None, using=None, format=None, context=None): from bzrlib.diff import (get_trees_and_branches_to_diff_locked, show_diff_trees) if (prefix is None) or (prefix == '0'): # diff -p0 format old_label = '' new_label = '' elif prefix == '1': old_label = 'old/' new_label = 'new/' elif ':' in prefix: old_label, new_label = prefix.split(":") else: raise errors.BzrCommandError(gettext( '--prefix expects two values separated by a colon' ' (eg "old/:new/")')) if revision and len(revision) > 2: raise errors.BzrCommandError(gettext('bzr diff --revision takes exactly' ' one or two revision specifiers')) if using is not None and format is not None: raise errors.BzrCommandError(gettext( '{0} and {1} are mutually exclusive').format( '--using', '--format')) (old_tree, new_tree, old_branch, new_branch, specific_files, extra_trees) = get_trees_and_branches_to_diff_locked( file_list, revision, old, new, self.add_cleanup, apply_view=True) # GNU diff on Windows uses ANSI encoding for filenames path_encoding = osutils.get_diff_header_encoding() return show_diff_trees(old_tree, new_tree, sys.stdout, specific_files=specific_files, external_diff_options=diff_options, old_label=old_label, new_label=new_label, extra_trees=extra_trees, path_encoding=path_encoding, using=using, context=context, format_cls=format) class cmd_deleted(Command): __doc__ = """List files deleted in the working tree. """ # TODO: Show files deleted since a previous revision, or # between two revisions. # TODO: Much more efficient way to do this: read in new # directories with readdir, rather than stating each one. Same # level of effort but possibly much less IO. (Or possibly not, # if the directories are very large...) _see_also = ['status', 'ls'] takes_options = ['directory', 'show-ids'] @display_command def run(self, show_ids=False, directory=u'.'): tree = WorkingTree.open_containing(directory)[0] self.add_cleanup(tree.lock_read().unlock) old = tree.basis_tree() self.add_cleanup(old.lock_read().unlock) for path, ie in old.iter_entries_by_dir(): if not tree.has_id(ie.file_id): self.outf.write(path) if show_ids: self.outf.write(' ') self.outf.write(ie.file_id) self.outf.write('\n') class cmd_modified(Command): __doc__ = """List files modified in working tree. """ hidden = True _see_also = ['status', 'ls'] takes_options = ['directory', 'null'] @display_command def run(self, null=False, directory=u'.'): tree = WorkingTree.open_containing(directory)[0] self.add_cleanup(tree.lock_read().unlock) td = tree.changes_from(tree.basis_tree()) self.cleanup_now() for path, id, kind, text_modified, meta_modified in td.modified: if null: self.outf.write(path + '\0') else: self.outf.write(osutils.quotefn(path) + '\n') class cmd_added(Command): __doc__ = """List files added in working tree. """ hidden = True _see_also = ['status', 'ls'] takes_options = ['directory', 'null'] @display_command def run(self, null=False, directory=u'.'): wt = WorkingTree.open_containing(directory)[0] self.add_cleanup(wt.lock_read().unlock) basis = wt.basis_tree() self.add_cleanup(basis.lock_read().unlock) root_id = wt.get_root_id() for file_id in wt.all_file_ids(): if basis.has_id(file_id): continue if root_id == file_id: continue path = wt.id2path(file_id) if not os.access(osutils.pathjoin(wt.basedir, path), os.F_OK): continue if null: self.outf.write(path + '\0') else: self.outf.write(osutils.quotefn(path) + '\n') class cmd_root(Command): __doc__ = """Show the tree root directory. The root is the nearest enclosing directory with a .bzr control directory.""" takes_args = ['filename?'] @display_command def run(self, filename=None): """Print the branch root.""" tree = WorkingTree.open_containing(filename)[0] self.outf.write(tree.basedir + '\n') def _parse_limit(limitstring): try: return int(limitstring) except ValueError: msg = gettext("The limit argument must be an integer.") raise errors.BzrCommandError(msg) def _parse_levels(s): try: return int(s) except ValueError: msg = gettext("The levels argument must be an integer.") raise errors.BzrCommandError(msg) class cmd_log(Command): __doc__ = """Show historical log for a branch or subset of a branch. log is bzr's default tool for exploring the history of a branch. The branch to use is taken from the first parameter. If no parameters are given, the branch containing the working directory is logged. Here are some simple examples:: bzr log log the current branch bzr log foo.py log a file in its branch bzr log http://server/branch log a branch on a server The filtering, ordering and information shown for each revision can be controlled as explained below. By default, all revisions are shown sorted (topologically) so that newer revisions appear before older ones and descendants always appear before ancestors. If displayed, merged revisions are shown indented under the revision in which they were merged. :Output control: The log format controls how information about each revision is displayed. The standard log formats are called ``long``, ``short`` and ``line``. The default is long. See ``bzr help log-formats`` for more details on log formats. The following options can be used to control what information is displayed:: -l N display a maximum of N revisions -n N display N levels of revisions (0 for all, 1 for collapsed) -v display a status summary (delta) for each revision -p display a diff (patch) for each revision --show-ids display revision-ids (and file-ids), not just revnos Note that the default number of levels to display is a function of the log format. If the -n option is not used, the standard log formats show just the top level (mainline). Status summaries are shown using status flags like A, M, etc. To see the changes explained using words like ``added`` and ``modified`` instead, use the -vv option. :Ordering control: To display revisions from oldest to newest, use the --forward option. In most cases, using this option will have little impact on the total time taken to produce a log, though --forward does not incrementally display revisions like --reverse does when it can. :Revision filtering: The -r option can be used to specify what revision or range of revisions to filter against. The various forms are shown below:: -rX display revision X -rX.. display revision X and later -r..Y display up to and including revision Y -rX..Y display from X to Y inclusive See ``bzr help revisionspec`` for details on how to specify X and Y. Some common examples are given below:: -r-1 show just the tip -r-10.. show the last 10 mainline revisions -rsubmit:.. show what's new on this branch -rancestor:path.. show changes since the common ancestor of this branch and the one at location path -rdate:yesterday.. show changes since yesterday When logging a range of revisions using -rX..Y, log starts at revision Y and searches back in history through the primary ("left-hand") parents until it finds X. When logging just the top level (using -n1), an error is reported if X is not found along the way. If multi-level logging is used (-n0), X may be a nested merge revision and the log will be truncated accordingly. :Path filtering: If parameters are given and the first one is not a branch, the log will be filtered to show only those revisions that changed the nominated files or directories. Filenames are interpreted within their historical context. To log a deleted file, specify a revision range so that the file existed at the end or start of the range. Historical context is also important when interpreting pathnames of renamed files/directories. Consider the following example: * revision 1: add tutorial.txt * revision 2: modify tutorial.txt * revision 3: rename tutorial.txt to guide.txt; add tutorial.txt In this case: * ``bzr log guide.txt`` will log the file added in revision 1 * ``bzr log tutorial.txt`` will log the new file added in revision 3 * ``bzr log -r2 -p tutorial.txt`` will show the changes made to the original file in revision 2. * ``bzr log -r2 -p guide.txt`` will display an error message as there was no file called guide.txt in revision 2. Renames are always followed by log. By design, there is no need to explicitly ask for this (and no way to stop logging a file back until it was last renamed). :Other filtering: The --match option can be used for finding revisions that match a regular expression in a commit message, committer, author or bug. Specifying the option several times will match any of the supplied expressions. --match-author, --match-bugs, --match-committer and --match-message can be used to only match a specific field. :Tips & tricks: GUI tools and IDEs are often better at exploring history than command line tools: you may prefer qlog or viz from qbzr or bzr-gtk, the bzr-explorer shell, or the Loggerhead web interface. See the Plugin Guide and . You may find it useful to add the aliases below to ``bazaar.conf``:: [ALIASES] tip = log -r-1 top = log -l10 --line show = log -v -p ``bzr tip`` will then show the latest revision while ``bzr top`` will show the last 10 mainline revisions. To see the details of a particular revision X, ``bzr show -rX``. If you are interested in looking deeper into a particular merge X, use ``bzr log -n0 -rX``. ``bzr log -v`` on a branch with lots of history is currently very slow. A fix for this issue is currently under development. With or without that fix, it is recommended that a revision range be given when using the -v option. bzr has a generic full-text matching plugin, bzr-search, that can be used to find revisions matching user names, commit messages, etc. Among other features, this plugin can find all revisions containing a list of words but not others. When exploring non-mainline history on large projects with deep history, the performance of log can be greatly improved by installing the historycache plugin. This plugin buffers historical information trading disk space for faster speed. """ takes_args = ['file*'] _see_also = ['log-formats', 'revisionspec'] takes_options = [ Option('forward', help='Show from oldest to newest.'), 'timezone', custom_help('verbose', help='Show files changed in each revision.'), 'show-ids', 'revision', Option('change', type=bzrlib.option._parse_revision_str, short_name='c', help='Show just the specified revision.' ' See also "help revisionspec".'), 'log-format', RegistryOption('authors', 'What names to list as authors - first, all or committer.', title='Authors', lazy_registry=('bzrlib.log', 'author_list_registry'), ), Option('levels', short_name='n', help='Number of levels to display - 0 for all, 1 for flat.', argname='N', type=_parse_levels), Option('message', help='Show revisions whose message matches this ' 'regular expression.', type=str, hidden=True), Option('limit', short_name='l', help='Limit the output to the first N revisions.', argname='N', type=_parse_limit), Option('show-diff', short_name='p', help='Show changes made in each revision as a patch.'), Option('include-merged', help='Show merged revisions like --levels 0 does.'), Option('include-merges', hidden=True, help='Historical alias for --include-merged.'), Option('omit-merges', help='Do not report commits with more than one parent.'), Option('exclude-common-ancestry', help='Display only the revisions that are not part' ' of both ancestries (require -rX..Y).' ), Option('signatures', help='Show digital signature validity.'), ListOption('match', short_name='m', help='Show revisions whose properties match this ' 'expression.', type=str), ListOption('match-message', help='Show revisions whose message matches this ' 'expression.', type=str), ListOption('match-committer', help='Show revisions whose committer matches this ' 'expression.', type=str), ListOption('match-author', help='Show revisions whose authors match this ' 'expression.', type=str), ListOption('match-bugs', help='Show revisions whose bugs match this ' 'expression.', type=str) ] encoding_type = 'replace' @display_command def run(self, file_list=None, timezone='original', verbose=False, show_ids=False, forward=False, revision=None, change=None, log_format=None, levels=None, message=None, limit=None, show_diff=False, include_merged=None, authors=None, exclude_common_ancestry=False, signatures=False, match=None, match_message=None, match_committer=None, match_author=None, match_bugs=None, omit_merges=False, include_merges=symbol_versioning.DEPRECATED_PARAMETER, ): from bzrlib.log import ( Logger, make_log_request_dict, _get_info_for_log_files, ) direction = (forward and 'forward') or 'reverse' if symbol_versioning.deprecated_passed(include_merges): ui.ui_factory.show_user_warning( 'deprecated_command_option', deprecated_name='--include-merges', recommended_name='--include-merged', deprecated_in_version='2.5', command=self.invoked_as) if include_merged is None: include_merged = include_merges else: raise errors.BzrCommandError(gettext( '{0} and {1} are mutually exclusive').format( '--include-merges', '--include-merged')) if include_merged is None: include_merged = False if (exclude_common_ancestry and (revision is None or len(revision) != 2)): raise errors.BzrCommandError(gettext( '--exclude-common-ancestry requires -r with two revisions')) if include_merged: if levels is None: levels = 0 else: raise errors.BzrCommandError(gettext( '{0} and {1} are mutually exclusive').format( '--levels', '--include-merged')) if change is not None: if len(change) > 1: raise errors.RangeInChangeOption() if revision is not None: raise errors.BzrCommandError(gettext( '{0} and {1} are mutually exclusive').format( '--revision', '--change')) else: revision = change file_ids = [] filter_by_dir = False if file_list: # find the file ids to log and check for directory filtering b, file_info_list, rev1, rev2 = _get_info_for_log_files( revision, file_list, self.add_cleanup) for relpath, file_id, kind in file_info_list: if file_id is None: raise errors.BzrCommandError(gettext( "Path unknown at end or start of revision range: %s") % relpath) # If the relpath is the top of the tree, we log everything if relpath == '': file_ids = [] break else: file_ids.append(file_id) filter_by_dir = filter_by_dir or ( kind in ['directory', 'tree-reference']) else: # log everything # FIXME ? log the current subdir only RBC 20060203 if revision is not None \ and len(revision) > 0 and revision[0].get_branch(): location = revision[0].get_branch() else: location = '.' dir, relpath = controldir.ControlDir.open_containing(location) b = dir.open_branch() self.add_cleanup(b.lock_read().unlock) rev1, rev2 = _get_revision_range(revision, b, self.name()) if b.get_config_stack().get('validate_signatures_in_log'): signatures = True if signatures: if not gpg.GPGStrategy.verify_signatures_available(): raise errors.GpgmeNotInstalled(None) # Decide on the type of delta & diff filtering to use # TODO: add an --all-files option to make this configurable & consistent if not verbose: delta_type = None else: delta_type = 'full' if not show_diff: diff_type = None elif file_ids: diff_type = 'partial' else: diff_type = 'full' # Build the log formatter if log_format is None: log_format = log.log_formatter_registry.get_default(b) # Make a non-encoding output to include the diffs - bug 328007 unencoded_output = ui.ui_factory.make_output_stream(encoding_type='exact') lf = log_format(show_ids=show_ids, to_file=self.outf, to_exact_file=unencoded_output, show_timezone=timezone, delta_format=get_verbosity_level(), levels=levels, show_advice=levels is None, author_list_handler=authors) # Choose the algorithm for doing the logging. It's annoying # having multiple code paths like this but necessary until # the underlying repository format is faster at generating # deltas or can provide everything we need from the indices. # The default algorithm - match-using-deltas - works for # multiple files and directories and is faster for small # amounts of history (200 revisions say). However, it's too # slow for logging a single file in a repository with deep # history, i.e. > 10K revisions. In the spirit of "do no # evil when adding features", we continue to use the # original algorithm - per-file-graph - for the "single # file that isn't a directory without showing a delta" case. partial_history = revision and b.repository._format.supports_chks match_using_deltas = (len(file_ids) != 1 or filter_by_dir or delta_type or partial_history) match_dict = {} if match: match_dict[''] = match if match_message: match_dict['message'] = match_message if match_committer: match_dict['committer'] = match_committer if match_author: match_dict['author'] = match_author if match_bugs: match_dict['bugs'] = match_bugs # Build the LogRequest and execute it if len(file_ids) == 0: file_ids = None rqst = make_log_request_dict( direction=direction, specific_fileids=file_ids, start_revision=rev1, end_revision=rev2, limit=limit, message_search=message, delta_type=delta_type, diff_type=diff_type, _match_using_deltas=match_using_deltas, exclude_common_ancestry=exclude_common_ancestry, match=match_dict, signature=signatures, omit_merges=omit_merges, ) Logger(b, rqst).show(lf) def _get_revision_range(revisionspec_list, branch, command_name): """Take the input of a revision option and turn it into a revision range. It returns RevisionInfo objects which can be used to obtain the rev_id's of the desired revisions. It does some user input validations. """ if revisionspec_list is None: rev1 = None rev2 = None elif len(revisionspec_list) == 1: rev1 = rev2 = revisionspec_list[0].in_history(branch) elif len(revisionspec_list) == 2: start_spec = revisionspec_list[0] end_spec = revisionspec_list[1] if end_spec.get_branch() != start_spec.get_branch(): # b is taken from revision[0].get_branch(), and # show_log will use its revision_history. Having # different branches will lead to weird behaviors. raise errors.BzrCommandError(gettext( "bzr %s doesn't accept two revisions in different" " branches.") % command_name) if start_spec.spec is None: # Avoid loading all the history. rev1 = RevisionInfo(branch, None, None) else: rev1 = start_spec.in_history(branch) # Avoid loading all of history when we know a missing # end of range means the last revision ... if end_spec.spec is None: last_revno, last_revision_id = branch.last_revision_info() rev2 = RevisionInfo(branch, last_revno, last_revision_id) else: rev2 = end_spec.in_history(branch) else: raise errors.BzrCommandError(gettext( 'bzr %s --revision takes one or two values.') % command_name) return rev1, rev2 def _revision_range_to_revid_range(revision_range): rev_id1 = None rev_id2 = None if revision_range[0] is not None: rev_id1 = revision_range[0].rev_id if revision_range[1] is not None: rev_id2 = revision_range[1].rev_id return rev_id1, rev_id2 def get_log_format(long=False, short=False, line=False, default='long'): log_format = default if long: log_format = 'long' if short: log_format = 'short' if line: log_format = 'line' return log_format class cmd_touching_revisions(Command): __doc__ = """Return revision-ids which affected a particular file. A more user-friendly interface is "bzr log FILE". """ hidden = True takes_args = ["filename"] @display_command def run(self, filename): tree, relpath = WorkingTree.open_containing(filename) file_id = tree.path2id(relpath) b = tree.branch self.add_cleanup(b.lock_read().unlock) touching_revs = log.find_touching_revisions(b, file_id) for revno, revision_id, what in touching_revs: self.outf.write("%6d %s\n" % (revno, what)) class cmd_ls(Command): __doc__ = """List files in a tree. """ _see_also = ['status', 'cat'] takes_args = ['path?'] takes_options = [ 'verbose', 'revision', Option('recursive', short_name='R', help='Recurse into subdirectories.'), Option('from-root', help='Print paths relative to the root of the branch.'), Option('unknown', short_name='u', help='Print unknown files.'), Option('versioned', help='Print versioned files.', short_name='V'), Option('ignored', short_name='i', help='Print ignored files.'), Option('kind', short_name='k', help='List entries of a particular kind: file, directory, symlink.', type=unicode), 'null', 'show-ids', 'directory', ] @display_command def run(self, revision=None, verbose=False, recursive=False, from_root=False, unknown=False, versioned=False, ignored=False, null=False, kind=None, show_ids=False, path=None, directory=None): if kind and kind not in ('file', 'directory', 'symlink'): raise errors.BzrCommandError(gettext('invalid kind specified')) if verbose and null: raise errors.BzrCommandError(gettext('Cannot set both --verbose and --null')) all = not (unknown or versioned or ignored) selection = {'I':ignored, '?':unknown, 'V':versioned} if path is None: fs_path = '.' else: if from_root: raise errors.BzrCommandError(gettext('cannot specify both --from-root' ' and PATH')) fs_path = path tree, branch, relpath = \ _open_directory_or_containing_tree_or_branch(fs_path, directory) # Calculate the prefix to use prefix = None if from_root: if relpath: prefix = relpath + '/' elif fs_path != '.' and not fs_path.endswith('/'): prefix = fs_path + '/' if revision is not None or tree is None: tree = _get_one_revision_tree('ls', revision, branch=branch) apply_view = False if isinstance(tree, WorkingTree) and tree.supports_views(): view_files = tree.views.lookup_view() if view_files: apply_view = True view_str = views.view_display_str(view_files) note(gettext("Ignoring files outside view. View is %s") % view_str) self.add_cleanup(tree.lock_read().unlock) for fp, fc, fkind, fid, entry in tree.list_files(include_root=False, from_dir=relpath, recursive=recursive): # Apply additional masking if not all and not selection[fc]: continue if kind is not None and fkind != kind: continue if apply_view: try: if relpath: fullpath = osutils.pathjoin(relpath, fp) else: fullpath = fp views.check_path_in_view(tree, fullpath) except errors.FileOutsideView: continue # Output the entry if prefix: fp = osutils.pathjoin(prefix, fp) kindch = entry.kind_character() outstring = fp + kindch ui.ui_factory.clear_term() if verbose: outstring = '%-8s %s' % (fc, outstring) if show_ids and fid is not None: outstring = "%-50s %s" % (outstring, fid) self.outf.write(outstring + '\n') elif null: self.outf.write(fp + '\0') if show_ids: if fid is not None: self.outf.write(fid) self.outf.write('\0') self.outf.flush() else: if show_ids: if fid is not None: my_id = fid else: my_id = '' self.outf.write('%-50s %s\n' % (outstring, my_id)) else: self.outf.write(outstring + '\n') class cmd_unknowns(Command): __doc__ = """List unknown files. """ hidden = True _see_also = ['ls'] takes_options = ['directory'] @display_command def run(self, directory=u'.'): for f in WorkingTree.open_containing(directory)[0].unknowns(): self.outf.write(osutils.quotefn(f) + '\n') class cmd_ignore(Command): __doc__ = """Ignore specified files or patterns. See ``bzr help patterns`` for details on the syntax of patterns. If a .bzrignore file does not exist, the ignore command will create one and add the specified files or patterns to the newly created file. The ignore command will also automatically add the .bzrignore file to be versioned. Creating a .bzrignore file without the use of the ignore command will require an explicit add command. To remove patterns from the ignore list, edit the .bzrignore file. After adding, editing or deleting that file either indirectly by using this command or directly by using an editor, be sure to commit it. Bazaar also supports a global ignore file ~/.bazaar/ignore. On Windows the global ignore file can be found in the application data directory as C:\\Documents and Settings\\\\Application Data\\Bazaar\\2.0\\ignore. Global ignores are not touched by this command. The global ignore file can be edited directly using an editor. Patterns prefixed with '!' are exceptions to ignore patterns and take precedence over regular ignores. Such exceptions are used to specify files that should be versioned which would otherwise be ignored. Patterns prefixed with '!!' act as regular ignore patterns, but have precedence over the '!' exception patterns. :Notes: * Ignore patterns containing shell wildcards must be quoted from the shell on Unix. * Ignore patterns starting with "#" act as comments in the ignore file. To ignore patterns that begin with that character, use the "RE:" prefix. :Examples: Ignore the top level Makefile:: bzr ignore ./Makefile Ignore .class files in all directories...:: bzr ignore "*.class" ...but do not ignore "special.class":: bzr ignore "!special.class" Ignore files whose name begins with the "#" character:: bzr ignore "RE:^#" Ignore .o files under the lib directory:: bzr ignore "lib/**/*.o" Ignore .o files under the lib directory:: bzr ignore "RE:lib/.*\.o" Ignore everything but the "debian" toplevel directory:: bzr ignore "RE:(?!debian/).*" Ignore everything except the "local" toplevel directory, but always ignore autosave files ending in ~, even under local/:: bzr ignore "*" bzr ignore "!./local" bzr ignore "!!*~" """ _see_also = ['status', 'ignored', 'patterns'] takes_args = ['name_pattern*'] takes_options = ['directory', Option('default-rules', help='Display the default ignore rules that bzr uses.') ] def run(self, name_pattern_list=None, default_rules=None, directory=u'.'): from bzrlib import ignores if default_rules is not None: # dump the default rules and exit for pattern in ignores.USER_DEFAULTS: self.outf.write("%s\n" % pattern) return if not name_pattern_list: raise errors.BzrCommandError(gettext("ignore requires at least one " "NAME_PATTERN or --default-rules.")) name_pattern_list = [globbing.normalize_pattern(p) for p in name_pattern_list] bad_patterns = '' bad_patterns_count = 0 for p in name_pattern_list: if not globbing.Globster.is_pattern_valid(p): bad_patterns_count += 1 bad_patterns += ('\n %s' % p) if bad_patterns: msg = (ngettext('Invalid ignore pattern found. %s', 'Invalid ignore patterns found. %s', bad_patterns_count) % bad_patterns) ui.ui_factory.show_error(msg) raise errors.InvalidPattern('') for name_pattern in name_pattern_list: if (name_pattern[0] == '/' or (len(name_pattern) > 1 and name_pattern[1] == ':')): raise errors.BzrCommandError(gettext( "NAME_PATTERN should not be an absolute path")) tree, relpath = WorkingTree.open_containing(directory) ignores.tree_ignores_add_patterns(tree, name_pattern_list) ignored = globbing.Globster(name_pattern_list) matches = [] self.add_cleanup(tree.lock_read().unlock) for entry in tree.list_files(): id = entry[3] if id is not None: filename = entry[0] if ignored.match(filename): matches.append(filename) if len(matches) > 0: self.outf.write(gettext("Warning: the following files are version " "controlled and match your ignore pattern:\n%s" "\nThese files will continue to be version controlled" " unless you 'bzr remove' them.\n") % ("\n".join(matches),)) class cmd_ignored(Command): __doc__ = """List ignored files and the patterns that matched them. List all the ignored files and the ignore pattern that caused the file to be ignored. Alternatively, to list just the files:: bzr ls --ignored """ encoding_type = 'replace' _see_also = ['ignore', 'ls'] takes_options = ['directory'] @display_command def run(self, directory=u'.'): tree = WorkingTree.open_containing(directory)[0] self.add_cleanup(tree.lock_read().unlock) for path, file_class, kind, file_id, entry in tree.list_files(): if file_class != 'I': continue ## XXX: Slightly inefficient since this was already calculated pat = tree.is_ignored(path) self.outf.write('%-50s %s\n' % (path, pat)) class cmd_lookup_revision(Command): __doc__ = """Lookup the revision-id from a revision-number :Examples: bzr lookup-revision 33 """ hidden = True takes_args = ['revno'] takes_options = ['directory'] @display_command def run(self, revno, directory=u'.'): try: revno = int(revno) except ValueError: raise errors.BzrCommandError(gettext("not a valid revision-number: %r") % revno) revid = WorkingTree.open_containing(directory)[0].branch.get_rev_id(revno) self.outf.write("%s\n" % revid) class cmd_export(Command): __doc__ = """Export current or past revision to a destination directory or archive. If no revision is specified this exports the last committed revision. Format may be an "exporter" name, such as tar, tgz, tbz2. If none is given, try to find the format with the extension. If no extension is found exports to a directory (equivalent to --format=dir). If root is supplied, it will be used as the root directory inside container formats (tar, zip, etc). If it is not supplied it will default to the exported filename. The root option has no effect for 'dir' format. If branch is omitted then the branch containing the current working directory will be used. Note: Export of tree with non-ASCII filenames to zip is not supported. ================= ========================= Supported formats Autodetected by extension ================= ========================= dir (none) tar .tar tbz2 .tar.bz2, .tbz2 tgz .tar.gz, .tgz zip .zip ================= ========================= """ encoding = 'exact' takes_args = ['dest', 'branch_or_subdir?'] takes_options = ['directory', Option('format', help="Type of file to export to.", type=unicode), 'revision', Option('filters', help='Apply content filters to export the ' 'convenient form.'), Option('root', type=str, help="Name of the root directory inside the exported file."), Option('per-file-timestamps', help='Set modification time of files to that of the last ' 'revision in which it was changed.'), Option('uncommitted', help='Export the working tree contents rather than that of the ' 'last revision.'), ] def run(self, dest, branch_or_subdir=None, revision=None, format=None, root=None, filters=False, per_file_timestamps=False, uncommitted=False, directory=u'.'): from bzrlib.export import export if branch_or_subdir is None: branch_or_subdir = directory (tree, b, subdir) = controldir.ControlDir.open_containing_tree_or_branch( branch_or_subdir) if tree is not None: self.add_cleanup(tree.lock_read().unlock) if uncommitted: if tree is None: raise errors.BzrCommandError( gettext("--uncommitted requires a working tree")) export_tree = tree else: export_tree = _get_one_revision_tree('export', revision, branch=b, tree=tree) try: export(export_tree, dest, format, root, subdir, filtered=filters, per_file_timestamps=per_file_timestamps) except errors.NoSuchExportFormat, e: raise errors.BzrCommandError( gettext('Unsupported export format: %s') % e.format) class cmd_cat(Command): __doc__ = """Write the contents of a file as of a given revision to standard output. If no revision is nominated, the last revision is used. Note: Take care to redirect standard output when using this command on a binary file. """ _see_also = ['ls'] takes_options = ['directory', Option('name-from-revision', help='The path name in the old tree.'), Option('filters', help='Apply content filters to display the ' 'convenience form.'), 'revision', ] takes_args = ['filename'] encoding_type = 'exact' @display_command def run(self, filename, revision=None, name_from_revision=False, filters=False, directory=None): if revision is not None and len(revision) != 1: raise errors.BzrCommandError(gettext("bzr cat --revision takes exactly" " one revision specifier")) tree, branch, relpath = \ _open_directory_or_containing_tree_or_branch(filename, directory) self.add_cleanup(branch.lock_read().unlock) return self._run(tree, branch, relpath, filename, revision, name_from_revision, filters) def _run(self, tree, b, relpath, filename, revision, name_from_revision, filtered): if tree is None: tree = b.basis_tree() rev_tree = _get_one_revision_tree('cat', revision, branch=b) self.add_cleanup(rev_tree.lock_read().unlock) old_file_id = rev_tree.path2id(relpath) # TODO: Split out this code to something that generically finds the # best id for a path across one or more trees; it's like # find_ids_across_trees but restricted to find just one. -- mbp # 20110705. if name_from_revision: # Try in revision if requested if old_file_id is None: raise errors.BzrCommandError(gettext( "{0!r} is not present in revision {1}").format( filename, rev_tree.get_revision_id())) else: actual_file_id = old_file_id else: cur_file_id = tree.path2id(relpath) if cur_file_id is not None and rev_tree.has_id(cur_file_id): actual_file_id = cur_file_id elif old_file_id is not None: actual_file_id = old_file_id else: raise errors.BzrCommandError(gettext( "{0!r} is not present in revision {1}").format( filename, rev_tree.get_revision_id())) if filtered: from bzrlib.filter_tree import ContentFilterTree filter_tree = ContentFilterTree(rev_tree, rev_tree._content_filter_stack) content = filter_tree.get_file_text(actual_file_id) else: content = rev_tree.get_file_text(actual_file_id) self.cleanup_now() self.outf.write(content) class cmd_local_time_offset(Command): __doc__ = """Show the offset in seconds from GMT to local time.""" hidden = True @display_command def run(self): self.outf.write("%s\n" % osutils.local_time_offset()) class cmd_commit(Command): __doc__ = """Commit changes into a new revision. An explanatory message needs to be given for each commit. This is often done by using the --message option (getting the message from the command line) or by using the --file option (getting the message from a file). If neither of these options is given, an editor is opened for the user to enter the message. To see the changed files in the boilerplate text loaded into the editor, use the --show-diff option. By default, the entire tree is committed and the person doing the commit is assumed to be the author. These defaults can be overridden as explained below. :Selective commits: If selected files are specified, only changes to those files are committed. If a directory is specified then the directory and everything within it is committed. When excludes are given, they take precedence over selected files. For example, to commit only changes within foo, but not changes within foo/bar:: bzr commit foo -x foo/bar A selective commit after a merge is not yet supported. :Custom authors: If the author of the change is not the same person as the committer, you can specify the author's name using the --author option. The name should be in the same format as a committer-id, e.g. "John Doe ". If there is more than one author of the change you can specify the option multiple times, once for each author. :Checks: A common mistake is to forget to add a new file or directory before running the commit command. The --strict option checks for unknown files and aborts the commit if any are found. More advanced pre-commit checks can be implemented by defining hooks. See ``bzr help hooks`` for details. :Things to note: If you accidentially commit the wrong changes or make a spelling mistake in the commit message say, you can use the uncommit command to undo it. See ``bzr help uncommit`` for details. Hooks can also be configured to run after a commit. This allows you to trigger updates to external systems like bug trackers. The --fixes option can be used to record the association between a revision and one or more bugs. See ``bzr help bugs`` for details. """ _see_also = ['add', 'bugs', 'hooks', 'uncommit'] takes_args = ['selected*'] takes_options = [ ListOption('exclude', type=str, short_name='x', help="Do not consider changes made to a given path."), Option('message', type=unicode, short_name='m', help="Description of the new revision."), 'verbose', Option('unchanged', help='Commit even if nothing has changed.'), Option('file', type=str, short_name='F', argname='msgfile', help='Take commit message from this file.'), Option('strict', help="Refuse to commit if there are unknown " "files in the working tree."), Option('commit-time', type=str, help="Manually set a commit time using commit date " "format, e.g. '2009-10-10 08:00:00 +0100'."), ListOption('fixes', type=str, help="Mark a bug as being fixed by this revision " "(see \"bzr help bugs\")."), ListOption('author', type=unicode, help="Set the author's name, if it's different " "from the committer."), Option('local', help="Perform a local commit in a bound " "branch. Local commits are not pushed to " "the master branch until a normal commit " "is performed." ), Option('show-diff', short_name='p', help='When no message is supplied, show the diff along' ' with the status summary in the message editor.'), Option('lossy', help='When committing to a foreign version control ' 'system do not push data that can not be natively ' 'represented.'), ] aliases = ['ci', 'checkin'] def _iter_bug_fix_urls(self, fixes, branch): default_bugtracker = None # Configure the properties for bug fixing attributes. for fixed_bug in fixes: tokens = fixed_bug.split(':') if len(tokens) == 1: if default_bugtracker is None: branch_config = branch.get_config_stack() default_bugtracker = branch_config.get( "bugtracker") if default_bugtracker is None: raise errors.BzrCommandError(gettext( "No tracker specified for bug %s. Use the form " "'tracker:id' or specify a default bug tracker " "using the `bugtracker` option.\nSee " "\"bzr help bugs\" for more information on this " "feature. Commit refused.") % fixed_bug) tag = default_bugtracker bug_id = tokens[0] elif len(tokens) != 2: raise errors.BzrCommandError(gettext( "Invalid bug %s. Must be in the form of 'tracker:id'. " "See \"bzr help bugs\" for more information on this " "feature.\nCommit refused.") % fixed_bug) else: tag, bug_id = tokens try: yield bugtracker.get_bug_url(tag, branch, bug_id) except errors.UnknownBugTrackerAbbreviation: raise errors.BzrCommandError(gettext( 'Unrecognized bug %s. Commit refused.') % fixed_bug) except errors.MalformedBugIdentifier, e: raise errors.BzrCommandError(gettext( "%s\nCommit refused.") % (str(e),)) def run(self, message=None, file=None, verbose=False, selected_list=None, unchanged=False, strict=False, local=False, fixes=None, author=None, show_diff=False, exclude=None, commit_time=None, lossy=False): from bzrlib.errors import ( PointlessCommit, ConflictsInTree, StrictCommitFailed ) from bzrlib.msgeditor import ( edit_commit_message_encoded, generate_commit_message_template, make_commit_message_template_encoded, set_commit_message, ) commit_stamp = offset = None if commit_time is not None: try: commit_stamp, offset = timestamp.parse_patch_date(commit_time) except ValueError, e: raise errors.BzrCommandError(gettext( "Could not parse --commit-time: " + str(e))) properties = {} tree, selected_list = WorkingTree.open_containing_paths(selected_list) if selected_list == ['']: # workaround - commit of root of tree should be exactly the same # as just default commit in that tree, and succeed even though # selected-file merge commit is not done yet selected_list = [] if fixes is None: fixes = [] bug_property = bugtracker.encode_fixes_bug_urls( self._iter_bug_fix_urls(fixes, tree.branch)) if bug_property: properties['bugs'] = bug_property if local and not tree.branch.get_bound_location(): raise errors.LocalRequiresBoundBranch() if message is not None: try: file_exists = osutils.lexists(message) except UnicodeError: # The commit message contains unicode characters that can't be # represented in the filesystem encoding, so that can't be a # file. file_exists = False if file_exists: warning_msg = ( 'The commit message is a file name: "%(f)s".\n' '(use --file "%(f)s" to take commit message from that file)' % { 'f': message }) ui.ui_factory.show_warning(warning_msg) if '\r' in message: message = message.replace('\r\n', '\n') message = message.replace('\r', '\n') if file: raise errors.BzrCommandError(gettext( "please specify either --message or --file")) def get_message(commit_obj): """Callback to get commit message""" if file: f = open(file) try: my_message = f.read().decode(osutils.get_user_encoding()) finally: f.close() elif message is not None: my_message = message else: # No message supplied: make one up. # text is the status of the tree text = make_commit_message_template_encoded(tree, selected_list, diff=show_diff, output_encoding=osutils.get_user_encoding()) # start_message is the template generated from hooks # XXX: Warning - looks like hooks return unicode, # make_commit_message_template_encoded returns user encoding. # We probably want to be using edit_commit_message instead to # avoid this. my_message = set_commit_message(commit_obj) if my_message is None: start_message = generate_commit_message_template(commit_obj) my_message = edit_commit_message_encoded(text, start_message=start_message) if my_message is None: raise errors.BzrCommandError(gettext("please specify a commit" " message with either --message or --file")) if my_message == "": raise errors.BzrCommandError(gettext("Empty commit message specified." " Please specify a commit message with either" " --message or --file or leave a blank message" " with --message \"\".")) return my_message # The API permits a commit with a filter of [] to mean 'select nothing' # but the command line should not do that. if not selected_list: selected_list = None try: tree.commit(message_callback=get_message, specific_files=selected_list, allow_pointless=unchanged, strict=strict, local=local, reporter=None, verbose=verbose, revprops=properties, authors=author, timestamp=commit_stamp, timezone=offset, exclude=tree.safe_relpath_files(exclude), lossy=lossy) except PointlessCommit: raise errors.BzrCommandError(gettext("No changes to commit." " Please 'bzr add' the files you want to commit, or use" " --unchanged to force an empty commit.")) except ConflictsInTree: raise errors.BzrCommandError(gettext('Conflicts detected in working ' 'tree. Use "bzr conflicts" to list, "bzr resolve FILE" to' ' resolve.')) except StrictCommitFailed: raise errors.BzrCommandError(gettext("Commit refused because there are" " unknown files in the working tree.")) except errors.BoundBranchOutOfDate, e: e.extra_help = (gettext("\n" 'To commit to master branch, run update and then commit.\n' 'You can also pass --local to commit to continue working ' 'disconnected.')) raise class cmd_check(Command): __doc__ = """Validate working tree structure, branch consistency and repository history. This command checks various invariants about branch and repository storage to detect data corruption or bzr bugs. The working tree and branch checks will only give output if a problem is detected. The output fields of the repository check are: revisions This is just the number of revisions checked. It doesn't indicate a problem. versionedfiles This is just the number of versionedfiles checked. It doesn't indicate a problem. unreferenced ancestors Texts that are ancestors of other texts, but are not properly referenced by the revision ancestry. This is a subtle problem that Bazaar can work around. unique file texts This is the total number of unique file contents seen in the checked revisions. It does not indicate a problem. repeated file texts This is the total number of repeated texts seen in the checked revisions. Texts can be repeated when their file entries are modified, but the file contents are not. It does not indicate a problem. If no restrictions are specified, all Bazaar data that is found at the given location will be checked. :Examples: Check the tree and branch at 'foo':: bzr check --tree --branch foo Check only the repository at 'bar':: bzr check --repo bar Check everything at 'baz':: bzr check baz """ _see_also = ['reconcile'] takes_args = ['path?'] takes_options = ['verbose', Option('branch', help="Check the branch related to the" " current directory."), Option('repo', help="Check the repository related to the" " current directory."), Option('tree', help="Check the working tree related to" " the current directory.")] def run(self, path=None, verbose=False, branch=False, repo=False, tree=False): from bzrlib.check import check_dwim if path is None: path = '.' if not branch and not repo and not tree: branch = repo = tree = True check_dwim(path, verbose, do_branch=branch, do_repo=repo, do_tree=tree) class cmd_upgrade(Command): __doc__ = """Upgrade a repository, branch or working tree to a newer format. When the default format has changed after a major new release of Bazaar, you may be informed during certain operations that you should upgrade. Upgrading to a newer format may improve performance or make new features available. It may however limit interoperability with older repositories or with older versions of Bazaar. If you wish to upgrade to a particular format rather than the current default, that can be specified using the --format option. As a consequence, you can use the upgrade command this way to "downgrade" to an earlier format, though some conversions are a one way process (e.g. changing from the 1.x default to the 2.x default) so downgrading is not always possible. A backup.bzr.~#~ directory is created at the start of the conversion process (where # is a number). By default, this is left there on completion. If the conversion fails, delete the new .bzr directory and rename this one back in its place. Use the --clean option to ask for the backup.bzr directory to be removed on successful conversion. Alternatively, you can delete it by hand if everything looks good afterwards. If the location given is a shared repository, dependent branches are also converted provided the repository converts successfully. If the conversion of a branch fails, remaining branches are still tried. For more information on upgrades, see the Bazaar Upgrade Guide, http://doc.bazaar.canonical.com/latest/en/upgrade-guide/. """ _see_also = ['check', 'reconcile', 'formats'] takes_args = ['url?'] takes_options = [ RegistryOption('format', help='Upgrade to a specific format. See "bzr help' ' formats" for details.', lazy_registry=('bzrlib.controldir', 'format_registry'), converter=lambda name: controldir.format_registry.make_bzrdir(name), value_switches=True, title='Branch format'), Option('clean', help='Remove the backup.bzr directory if successful.'), Option('dry-run', help="Show what would be done, but don't actually do anything."), ] def run(self, url='.', format=None, clean=False, dry_run=False): from bzrlib.upgrade import upgrade exceptions = upgrade(url, format, clean_up=clean, dry_run=dry_run) if exceptions: if len(exceptions) == 1: # Compatibility with historical behavior raise exceptions[0] else: return 3 class cmd_whoami(Command): __doc__ = """Show or set bzr user id. :Examples: Show the email of the current user:: bzr whoami --email Set the current user:: bzr whoami "Frank Chu " """ takes_options = [ 'directory', Option('email', help='Display email address only.'), Option('branch', help='Set identity for the current branch instead of ' 'globally.'), ] takes_args = ['name?'] encoding_type = 'replace' @display_command def run(self, email=False, branch=False, name=None, directory=None): if name is None: if directory is None: # use branch if we're inside one; otherwise global config try: c = Branch.open_containing(u'.')[0].get_config_stack() except errors.NotBranchError: c = _mod_config.GlobalStack() else: c = Branch.open(directory).get_config_stack() identity = c.get('email') if email: self.outf.write(_mod_config.extract_email_address(identity) + '\n') else: self.outf.write(identity + '\n') return if email: raise errors.BzrCommandError(gettext("--email can only be used to display existing " "identity")) # display a warning if an email address isn't included in the given name. try: _mod_config.extract_email_address(name) except errors.NoEmailInUsername, e: warning('"%s" does not seem to contain an email address. ' 'This is allowed, but not recommended.', name) # use global config unless --branch given if branch: if directory is None: c = Branch.open_containing(u'.')[0].get_config_stack() else: b = Branch.open(directory) self.add_cleanup(b.lock_write().unlock) c = b.get_config_stack() else: c = _mod_config.GlobalStack() c.set('email', name) class cmd_nick(Command): __doc__ = """Print or set the branch nickname. If unset, the colocated branch name is used for colocated branches, and the branch directory name is used for other branches. To print the current nickname, execute with no argument. Bound branches use the nickname of its master branch unless it is set locally. """ _see_also = ['info'] takes_args = ['nickname?'] takes_options = ['directory'] def run(self, nickname=None, directory=u'.'): branch = Branch.open_containing(directory)[0] if nickname is None: self.printme(branch) else: branch.nick = nickname @display_command def printme(self, branch): self.outf.write('%s\n' % branch.nick) class cmd_alias(Command): __doc__ = """Set/unset and display aliases. :Examples: Show the current aliases:: bzr alias Show the alias specified for 'll':: bzr alias ll Set an alias for 'll':: bzr alias ll="log --line -r-10..-1" To remove an alias for 'll':: bzr alias --remove ll """ takes_args = ['name?'] takes_options = [ Option('remove', help='Remove the alias.'), ] def run(self, name=None, remove=False): if remove: self.remove_alias(name) elif name is None: self.print_aliases() else: equal_pos = name.find('=') if equal_pos == -1: self.print_alias(name) else: self.set_alias(name[:equal_pos], name[equal_pos+1:]) def remove_alias(self, alias_name): if alias_name is None: raise errors.BzrCommandError(gettext( 'bzr alias --remove expects an alias to remove.')) # If alias is not found, print something like: # unalias: foo: not found c = _mod_config.GlobalConfig() c.unset_alias(alias_name) @display_command def print_aliases(self): """Print out the defined aliases in a similar format to bash.""" aliases = _mod_config.GlobalConfig().get_aliases() for key, value in sorted(aliases.iteritems()): self.outf.write('bzr alias %s="%s"\n' % (key, value)) @display_command def print_alias(self, alias_name): from bzrlib.commands import get_alias alias = get_alias(alias_name) if alias is None: self.outf.write("bzr alias: %s: not found\n" % alias_name) else: self.outf.write( 'bzr alias %s="%s"\n' % (alias_name, ' '.join(alias))) def set_alias(self, alias_name, alias_command): """Save the alias in the global config.""" c = _mod_config.GlobalConfig() c.set_alias(alias_name, alias_command) class cmd_selftest(Command): __doc__ = """Run internal test suite. If arguments are given, they are regular expressions that say which tests should run. Tests matching any expression are run, and other tests are not run. Alternatively if --first is given, matching tests are run first and then all other tests are run. This is useful if you have been working in a particular area, but want to make sure nothing else was broken. If --exclude is given, tests that match that regular expression are excluded, regardless of whether they match --first or not. To help catch accidential dependencies between tests, the --randomize option is useful. In most cases, the argument used is the word 'now'. Note that the seed used for the random number generator is displayed when this option is used. The seed can be explicitly passed as the argument to this option if required. This enables reproduction of the actual ordering used if and when an order sensitive problem is encountered. If --list-only is given, the tests that would be run are listed. This is useful when combined with --first, --exclude and/or --randomize to understand their impact. The test harness reports "Listed nn tests in ..." instead of "Ran nn tests in ..." when list mode is enabled. If the global option '--no-plugins' is given, plugins are not loaded before running the selftests. This has two effects: features provided or modified by plugins will not be tested, and tests provided by plugins will not be run. Tests that need working space on disk use a common temporary directory, typically inside $TMPDIR or /tmp. If you set BZR_TEST_PDB=1 when running selftest, failing tests will drop into a pdb postmortem session. The --coverage=DIRNAME global option produces a report with covered code indicated. :Examples: Run only tests relating to 'ignore':: bzr selftest ignore Disable plugins and list tests as they're run:: bzr --no-plugins selftest -v """ # NB: this is used from the class without creating an instance, which is # why it does not have a self parameter. def get_transport_type(typestring): """Parse and return a transport specifier.""" if typestring == "sftp": from bzrlib.tests import stub_sftp return stub_sftp.SFTPAbsoluteServer elif typestring == "memory": from bzrlib.tests import test_server return memory.MemoryServer elif typestring == "fakenfs": from bzrlib.tests import test_server return test_server.FakeNFSServer msg = "No known transport type %s. Supported types are: sftp\n" %\ (typestring) raise errors.BzrCommandError(msg) hidden = True takes_args = ['testspecs*'] takes_options = ['verbose', Option('one', help='Stop when one test fails.', short_name='1', ), Option('transport', help='Use a different transport by default ' 'throughout the test suite.', type=get_transport_type), Option('benchmark', help='Run the benchmarks rather than selftests.', hidden=True), Option('lsprof-timed', help='Generate lsprof output for benchmarked' ' sections of code.'), Option('lsprof-tests', help='Generate lsprof output for each test.'), Option('first', help='Run all tests, but run specified tests first.', short_name='f', ), Option('list-only', help='List the tests instead of running them.'), RegistryOption('parallel', help="Run the test suite in parallel.", lazy_registry=('bzrlib.tests', 'parallel_registry'), value_switches=False, ), Option('randomize', type=str, argname="SEED", help='Randomize the order of tests using the given' ' seed or "now" for the current time.'), ListOption('exclude', type=str, argname="PATTERN", short_name='x', help='Exclude tests that match this regular' ' expression.'), Option('subunit', help='Output test progress via subunit.'), Option('strict', help='Fail on missing dependencies or ' 'known failures.'), Option('load-list', type=str, argname='TESTLISTFILE', help='Load a test id list from a text file.'), ListOption('debugflag', type=str, short_name='E', help='Turn on a selftest debug flag.'), ListOption('starting-with', type=str, argname='TESTID', param_name='starting_with', short_name='s', help= 'Load only the tests starting with TESTID.'), Option('sync', help="By default we disable fsync and fdatasync" " while running the test suite.") ] encoding_type = 'replace' def __init__(self): Command.__init__(self) self.additional_selftest_args = {} def run(self, testspecs_list=None, verbose=False, one=False, transport=None, benchmark=None, lsprof_timed=None, first=False, list_only=False, randomize=None, exclude=None, strict=False, load_list=None, debugflag=None, starting_with=None, subunit=False, parallel=None, lsprof_tests=False, sync=False): # During selftest, disallow proxying, as it can cause severe # performance penalties and is only needed for thread # safety. The selftest command is assumed to not use threads # too heavily. The call should be as early as possible, as # error reporting for past duplicate imports won't have useful # backtraces. lazy_import.disallow_proxying() from bzrlib import tests if testspecs_list is not None: pattern = '|'.join(testspecs_list) else: pattern = ".*" if subunit: try: from bzrlib.tests import SubUnitBzrRunner except ImportError: raise errors.BzrCommandError(gettext("subunit not available. subunit " "needs to be installed to use --subunit.")) self.additional_selftest_args['runner_class'] = SubUnitBzrRunner # On Windows, disable automatic conversion of '\n' to '\r\n' in # stdout, which would corrupt the subunit stream. # FIXME: This has been fixed in subunit trunk (>0.0.5) so the # following code can be deleted when it's sufficiently deployed # -- vila/mgz 20100514 if (sys.platform == "win32" and getattr(sys.stdout, 'fileno', None) is not None): import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) if parallel: self.additional_selftest_args.setdefault( 'suite_decorators', []).append(parallel) if benchmark: raise errors.BzrCommandError(gettext( "--benchmark is no longer supported from bzr 2.2; " "use bzr-usertest instead")) test_suite_factory = None if not exclude: exclude_pattern = None else: exclude_pattern = '(' + '|'.join(exclude) + ')' if not sync: self._disable_fsync() selftest_kwargs = {"verbose": verbose, "pattern": pattern, "stop_on_failure": one, "transport": transport, "test_suite_factory": test_suite_factory, "lsprof_timed": lsprof_timed, "lsprof_tests": lsprof_tests, "matching_tests_first": first, "list_only": list_only, "random_seed": randomize, "exclude_pattern": exclude_pattern, "strict": strict, "load_list": load_list, "debug_flags": debugflag, "starting_with": starting_with } selftest_kwargs.update(self.additional_selftest_args) # Make deprecation warnings visible, unless -Werror is set cleanup = symbol_versioning.activate_deprecation_warnings( override=False) try: result = tests.selftest(**selftest_kwargs) finally: cleanup() return int(not result) def _disable_fsync(self): """Change the 'os' functionality to not synchronize.""" self._orig_fsync = getattr(os, 'fsync', None) if self._orig_fsync is not None: os.fsync = lambda filedes: None self._orig_fdatasync = getattr(os, 'fdatasync', None) if self._orig_fdatasync is not None: os.fdatasync = lambda filedes: None class cmd_version(Command): __doc__ = """Show version of bzr.""" encoding_type = 'replace' takes_options = [ Option("short", help="Print just the version number."), ] @display_command def run(self, short=False): from bzrlib.version import show_version if short: self.outf.write(bzrlib.version_string + '\n') else: show_version(to_file=self.outf) class cmd_rocks(Command): __doc__ = """Statement of optimism.""" hidden = True @display_command def run(self): self.outf.write(gettext("It sure does!\n")) class cmd_find_merge_base(Command): __doc__ = """Find and print a base revision for merging two branches.""" # TODO: Options to specify revisions on either side, as if # merging only part of the history. takes_args = ['branch', 'other'] hidden = True @display_command def run(self, branch, other): from bzrlib.revision import ensure_null branch1 = Branch.open_containing(branch)[0] branch2 = Branch.open_containing(other)[0] self.add_cleanup(branch1.lock_read().unlock) self.add_cleanup(branch2.lock_read().unlock) last1 = ensure_null(branch1.last_revision()) last2 = ensure_null(branch2.last_revision()) graph = branch1.repository.get_graph(branch2.repository) base_rev_id = graph.find_unique_lca(last1, last2) self.outf.write(gettext('merge base is revision %s\n') % base_rev_id) class cmd_merge(Command): __doc__ = """Perform a three-way merge. The source of the merge can be specified either in the form of a branch, or in the form of a path to a file containing a merge directive generated with bzr send. If neither is specified, the default is the upstream branch or the branch most recently merged using --remember. The source of the merge may also be specified in the form of a path to a file in another branch: in this case, only the modifications to that file are merged into the current working tree. When merging from a branch, by default bzr will try to merge in all new work from the other branch, automatically determining an appropriate base revision. If this fails, you may need to give an explicit base. To pick a different ending revision, pass "--revision OTHER". bzr will try to merge in all new work up to and including revision OTHER. If you specify two values, "--revision BASE..OTHER", only revisions BASE through OTHER, excluding BASE but including OTHER, will be merged. If this causes some revisions to be skipped, i.e. if the destination branch does not already contain revision BASE, such a merge is commonly referred to as a "cherrypick". Unlike a normal merge, Bazaar does not currently track cherrypicks. The changes look like a normal commit, and the history of the changes from the other branch is not stored in the commit. Revision numbers are always relative to the source branch. Merge will do its best to combine the changes in two branches, but there are some kinds of problems only a human can fix. When it encounters those, it will mark a conflict. A conflict means that you need to fix something, before you can commit. Use bzr resolve when you have fixed a problem. See also bzr conflicts. If there is no default branch set, the first merge will set it (use --no-remember to avoid setting it). After that, you can omit the branch to use the default. To change the default, use --remember. The value will only be saved if the remote location can be accessed. The results of the merge are placed into the destination working directory, where they can be reviewed (with bzr diff), tested, and then committed to record the result of the merge. merge refuses to run if there are any uncommitted changes, unless --force is given. If --force is given, then the changes from the source will be merged with the current working tree, including any uncommitted changes in the tree. The --force option can also be used to create a merge revision which has more than two parents. If one would like to merge changes from the working tree of the other branch without merging any committed revisions, the --uncommitted option can be given. To select only some changes to merge, use "merge -i", which will prompt you to apply each diff hunk and file change, similar to "shelve". :Examples: To merge all new revisions from bzr.dev:: bzr merge ../bzr.dev To merge changes up to and including revision 82 from bzr.dev:: bzr merge -r 82 ../bzr.dev To merge the changes introduced by 82, without previous changes:: bzr merge -r 81..82 ../bzr.dev To apply a merge directive contained in /tmp/merge:: bzr merge /tmp/merge To create a merge revision with three parents from two branches feature1a and feature1b: bzr merge ../feature1a bzr merge ../feature1b --force bzr commit -m 'revision with three parents' """ encoding_type = 'exact' _see_also = ['update', 'remerge', 'status-flags', 'send'] takes_args = ['location?'] takes_options = [ 'change', 'revision', Option('force', help='Merge even if the destination tree has uncommitted changes.'), 'merge-type', 'reprocess', 'remember', Option('show-base', help="Show base revision text in " "conflicts."), Option('uncommitted', help='Apply uncommitted changes' ' from a working copy, instead of branch changes.'), Option('pull', help='If the destination is already' ' completely merged into the source, pull from the' ' source rather than merging. When this happens,' ' you do not need to commit the result.'), custom_help('directory', help='Branch to merge into, ' 'rather than the one containing the working directory.'), Option('preview', help='Instead of merging, show a diff of the' ' merge.'), Option('interactive', help='Select changes interactively.', short_name='i') ] def run(self, location=None, revision=None, force=False, merge_type=None, show_base=False, reprocess=None, remember=None, uncommitted=False, pull=False, directory=None, preview=False, interactive=False, ): if merge_type is None: merge_type = _mod_merge.Merge3Merger if directory is None: directory = u'.' possible_transports = [] merger = None allow_pending = True verified = 'inapplicable' tree = WorkingTree.open_containing(directory)[0] if tree.branch.revno() == 0: raise errors.BzrCommandError(gettext('Merging into empty branches not currently supported, ' 'https://bugs.launchpad.net/bzr/+bug/308562')) try: basis_tree = tree.revision_tree(tree.last_revision()) except errors.NoSuchRevision: basis_tree = tree.basis_tree() # die as quickly as possible if there are uncommitted changes if not force: if tree.has_changes(): raise errors.UncommittedChanges(tree) view_info = _get_view_info_for_change_reporter(tree) change_reporter = delta._ChangeReporter( unversioned_filter=tree.is_ignored, view_info=view_info) pb = ui.ui_factory.nested_progress_bar() self.add_cleanup(pb.finished) self.add_cleanup(tree.lock_write().unlock) if location is not None: try: mergeable = bundle.read_mergeable_from_url(location, possible_transports=possible_transports) except errors.NotABundle: mergeable = None else: if uncommitted: raise errors.BzrCommandError(gettext('Cannot use --uncommitted' ' with bundles or merge directives.')) if revision is not None: raise errors.BzrCommandError(gettext( 'Cannot use -r with merge directives or bundles')) merger, verified = _mod_merge.Merger.from_mergeable(tree, mergeable, None) if merger is None and uncommitted: if revision is not None and len(revision) > 0: raise errors.BzrCommandError(gettext('Cannot use --uncommitted and' ' --revision at the same time.')) merger = self.get_merger_from_uncommitted(tree, location, None) allow_pending = False if merger is None: merger, allow_pending = self._get_merger_from_branch(tree, location, revision, remember, possible_transports, None) merger.merge_type = merge_type merger.reprocess = reprocess merger.show_base = show_base self.sanity_check_merger(merger) if (merger.base_rev_id == merger.other_rev_id and merger.other_rev_id is not None): # check if location is a nonexistent file (and not a branch) to # disambiguate the 'Nothing to do' if merger.interesting_files: if not merger.other_tree.has_filename( merger.interesting_files[0]): note(gettext("merger: ") + str(merger)) raise errors.PathsDoNotExist([location]) note(gettext('Nothing to do.')) return 0 if pull and not preview: if merger.interesting_files is not None: raise errors.BzrCommandError(gettext('Cannot pull individual files')) if (merger.base_rev_id == tree.last_revision()): result = tree.pull(merger.other_branch, False, merger.other_rev_id) result.report(self.outf) return 0 if merger.this_basis is None: raise errors.BzrCommandError(gettext( "This branch has no commits." " (perhaps you would prefer 'bzr pull')")) if preview: return self._do_preview(merger) elif interactive: return self._do_interactive(merger) else: return self._do_merge(merger, change_reporter, allow_pending, verified) def _get_preview(self, merger): tree_merger = merger.make_merger() tt = tree_merger.make_preview_transform() self.add_cleanup(tt.finalize) result_tree = tt.get_preview_tree() return result_tree def _do_preview(self, merger): from bzrlib.diff import show_diff_trees result_tree = self._get_preview(merger) path_encoding = osutils.get_diff_header_encoding() show_diff_trees(merger.this_tree, result_tree, self.outf, old_label='', new_label='', path_encoding=path_encoding) def _do_merge(self, merger, change_reporter, allow_pending, verified): merger.change_reporter = change_reporter conflict_count = merger.do_merge() if allow_pending: merger.set_pending() if verified == 'failed': warning('Preview patch does not match changes') if conflict_count != 0: return 1 else: return 0 def _do_interactive(self, merger): """Perform an interactive merge. This works by generating a preview tree of the merge, then using Shelver to selectively remove the differences between the working tree and the preview tree. """ from bzrlib import shelf_ui result_tree = self._get_preview(merger) writer = bzrlib.option.diff_writer_registry.get() shelver = shelf_ui.Shelver(merger.this_tree, result_tree, destroy=True, reporter=shelf_ui.ApplyReporter(), diff_writer=writer(sys.stdout)) try: shelver.run() finally: shelver.finalize() def sanity_check_merger(self, merger): if (merger.show_base and not merger.merge_type is _mod_merge.Merge3Merger): raise errors.BzrCommandError(gettext("Show-base is not supported for this" " merge type. %s") % merger.merge_type) if merger.reprocess is None: if merger.show_base: merger.reprocess = False else: # Use reprocess if the merger supports it merger.reprocess = merger.merge_type.supports_reprocess if merger.reprocess and not merger.merge_type.supports_reprocess: raise errors.BzrCommandError(gettext("Conflict reduction is not supported" " for merge type %s.") % merger.merge_type) if merger.reprocess and merger.show_base: raise errors.BzrCommandError(gettext("Cannot do conflict reduction and" " show base.")) def _get_merger_from_branch(self, tree, location, revision, remember, possible_transports, pb): """Produce a merger from a location, assuming it refers to a branch.""" from bzrlib.tag import _merge_tags_if_possible # find the branch locations other_loc, user_location = self._select_branch_location(tree, location, revision, -1) if revision is not None and len(revision) == 2: base_loc, _unused = self._select_branch_location(tree, location, revision, 0) else: base_loc = other_loc # Open the branches other_branch, other_path = Branch.open_containing(other_loc, possible_transports) if base_loc == other_loc: base_branch = other_branch else: base_branch, base_path = Branch.open_containing(base_loc, possible_transports) # Find the revision ids other_revision_id = None base_revision_id = None if revision is not None: if len(revision) >= 1: other_revision_id = revision[-1].as_revision_id(other_branch) if len(revision) == 2: base_revision_id = revision[0].as_revision_id(base_branch) if other_revision_id is None: other_revision_id = _mod_revision.ensure_null( other_branch.last_revision()) # Remember where we merge from. We need to remember if: # - user specify a location (and we don't merge from the parent # branch) # - user ask to remember or there is no previous location set to merge # from and user didn't ask to *not* remember if (user_location is not None and ((remember or (remember is None and tree.branch.get_submit_branch() is None)))): tree.branch.set_submit_branch(other_branch.base) # Merge tags (but don't set them in the master branch yet, the user # might revert this merge). Commit will propagate them. _merge_tags_if_possible(other_branch, tree.branch, ignore_master=True) merger = _mod_merge.Merger.from_revision_ids(pb, tree, other_revision_id, base_revision_id, other_branch, base_branch) if other_path != '': allow_pending = False merger.interesting_files = [other_path] else: allow_pending = True return merger, allow_pending def get_merger_from_uncommitted(self, tree, location, pb): """Get a merger for uncommitted changes. :param tree: The tree the merger should apply to. :param location: The location containing uncommitted changes. :param pb: The progress bar to use for showing progress. """ location = self._select_branch_location(tree, location)[0] other_tree, other_path = WorkingTree.open_containing(location) merger = _mod_merge.Merger.from_uncommitted(tree, other_tree, pb) if other_path != '': merger.interesting_files = [other_path] return merger def _select_branch_location(self, tree, user_location, revision=None, index=None): """Select a branch location, according to possible inputs. If provided, branches from ``revision`` are preferred. (Both ``revision`` and ``index`` must be supplied.) Otherwise, the ``location`` parameter is used. If it is None, then the ``submit`` or ``parent`` location is used, and a note is printed. :param tree: The working tree to select a branch for merging into :param location: The location entered by the user :param revision: The revision parameter to the command :param index: The index to use for the revision parameter. Negative indices are permitted. :return: (selected_location, user_location). The default location will be the user-entered location. """ if (revision is not None and index is not None and revision[index] is not None): branch = revision[index].get_branch() if branch is not None: return branch, branch if user_location is None: location = self._get_remembered(tree, 'Merging from') else: location = user_location return location, user_location def _get_remembered(self, tree, verb_string): """Use tree.branch's parent if none was supplied. Report if the remembered location was used. """ stored_location = tree.branch.get_submit_branch() stored_location_type = "submit" if stored_location is None: stored_location = tree.branch.get_parent() stored_location_type = "parent" mutter("%s", stored_location) if stored_location is None: raise errors.BzrCommandError(gettext("No location specified or remembered")) display_url = urlutils.unescape_for_display(stored_location, 'utf-8') note(gettext("{0} remembered {1} location {2}").format(verb_string, stored_location_type, display_url)) return stored_location class cmd_remerge(Command): __doc__ = """Redo a merge. Use this if you want to try a different merge technique while resolving conflicts. Some merge techniques are better than others, and remerge lets you try different ones on different files. The options for remerge have the same meaning and defaults as the ones for merge. The difference is that remerge can (only) be run when there is a pending merge, and it lets you specify particular files. :Examples: Re-do the merge of all conflicted files, and show the base text in conflict regions, in addition to the usual THIS and OTHER texts:: bzr remerge --show-base Re-do the merge of "foobar", using the weave merge algorithm, with additional processing to reduce the size of conflict regions:: bzr remerge --merge-type weave --reprocess foobar """ takes_args = ['file*'] takes_options = [ 'merge-type', 'reprocess', Option('show-base', help="Show base revision text in conflicts."), ] def run(self, file_list=None, merge_type=None, show_base=False, reprocess=False): from bzrlib.conflicts import restore if merge_type is None: merge_type = _mod_merge.Merge3Merger tree, file_list = WorkingTree.open_containing_paths(file_list) self.add_cleanup(tree.lock_write().unlock) parents = tree.get_parent_ids() if len(parents) != 2: raise errors.BzrCommandError(gettext("Sorry, remerge only works after normal" " merges. Not cherrypicking or" " multi-merges.")) repository = tree.branch.repository interesting_ids = None new_conflicts = [] conflicts = tree.conflicts() if file_list is not None: interesting_ids = set() for filename in file_list: file_id = tree.path2id(filename) if file_id is None: raise errors.NotVersionedError(filename) interesting_ids.add(file_id) if tree.kind(file_id) != "directory": continue # FIXME: Support nested trees for name, ie in tree.root_inventory.iter_entries(file_id): interesting_ids.add(ie.file_id) new_conflicts = conflicts.select_conflicts(tree, file_list)[0] else: # Remerge only supports resolving contents conflicts allowed_conflicts = ('text conflict', 'contents conflict') restore_files = [c.path for c in conflicts if c.typestring in allowed_conflicts] _mod_merge.transform_tree(tree, tree.basis_tree(), interesting_ids) tree.set_conflicts(ConflictList(new_conflicts)) if file_list is not None: restore_files = file_list for filename in restore_files: try: restore(tree.abspath(filename)) except errors.NotConflicted: pass # Disable pending merges, because the file texts we are remerging # have not had those merges performed. If we use the wrong parents # list, we imply that the working tree text has seen and rejected # all the changes from the other tree, when in fact those changes # have not yet been seen. tree.set_parent_ids(parents[:1]) try: merger = _mod_merge.Merger.from_revision_ids(None, tree, parents[1]) merger.interesting_ids = interesting_ids merger.merge_type = merge_type merger.show_base = show_base merger.reprocess = reprocess conflicts = merger.do_merge() finally: tree.set_parent_ids(parents) if conflicts > 0: return 1 else: return 0 class cmd_revert(Command): __doc__ = """\ Set files in the working tree back to the contents of a previous revision. Giving a list of files will revert only those files. Otherwise, all files will be reverted. If the revision is not specified with '--revision', the working tree basis revision is used. A revert operation affects only the working tree, not any revision history like the branch and repository or the working tree basis revision. To remove only some changes, without reverting to a prior version, use merge instead. For example, "merge . -r -2..-3" (don't forget the ".") will remove the changes introduced by the second last commit (-2), without affecting the changes introduced by the last commit (-1). To remove certain changes on a hunk-by-hunk basis, see the shelve command. To update the branch to a specific revision or the latest revision and update the working tree accordingly while preserving local changes, see the update command. Uncommitted changes to files that are reverted will be discarded. Howver, by default, any files that have been manually changed will be backed up first. (Files changed only by merge are not backed up.) Backup files have '.~#~' appended to their name, where # is a number. When you provide files, you can use their current pathname or the pathname from the target revision. So you can use revert to "undelete" a file by name. If you name a directory, all the contents of that directory will be reverted. If you have newly added files since the target revision, they will be removed. If the files to be removed have been changed, backups will be created as above. Directories containing unknown files will not be deleted. The working tree contains a list of revisions that have been merged but not yet committed. These revisions will be included as additional parents of the next commit. Normally, using revert clears that list as well as reverting the files. If any files are specified, revert leaves the list of uncommitted merges alone and reverts only the files. Use ``bzr revert .`` in the tree root to revert all files but keep the recorded merges, and ``bzr revert --forget-merges`` to clear the pending merge list without reverting any files. Using "bzr revert --forget-merges", it is possible to apply all of the changes from a branch in a single revision. To do this, perform the merge as desired. Then doing revert with the "--forget-merges" option will keep the content of the tree as it was, but it will clear the list of pending merges. The next commit will then contain all of the changes that are present in the other branch, but without any other parent revisions. Because this technique forgets where these changes originated, it may cause additional conflicts on later merges involving the same source and target branches. """ _see_also = ['cat', 'export', 'merge', 'shelve'] takes_options = [ 'revision', Option('no-backup', "Do not save backups of reverted files."), Option('forget-merges', 'Remove pending merge marker, without changing any files.'), ] takes_args = ['file*'] def run(self, revision=None, no_backup=False, file_list=None, forget_merges=None): tree, file_list = WorkingTree.open_containing_paths(file_list) self.add_cleanup(tree.lock_tree_write().unlock) if forget_merges: tree.set_parent_ids(tree.get_parent_ids()[:1]) else: self._revert_tree_to_revision(tree, revision, file_list, no_backup) @staticmethod def _revert_tree_to_revision(tree, revision, file_list, no_backup): rev_tree = _get_one_revision_tree('revert', revision, tree=tree) tree.revert(file_list, rev_tree, not no_backup, None, report_changes=True) class cmd_assert_fail(Command): __doc__ = """Test reporting of assertion failures""" # intended just for use in testing hidden = True def run(self): raise AssertionError("always fails") class cmd_help(Command): __doc__ = """Show help on a command or other topic. """ _see_also = ['topics'] takes_options = [ Option('long', 'Show help on all commands.'), ] takes_args = ['topic?'] aliases = ['?', '--help', '-?', '-h'] @display_command def run(self, topic=None, long=False): import bzrlib.help if topic is None and long: topic = "commands" bzrlib.help.help(topic) class cmd_shell_complete(Command): __doc__ = """Show appropriate completions for context. For a list of all available commands, say 'bzr shell-complete'. """ takes_args = ['context?'] aliases = ['s-c'] hidden = True @display_command def run(self, context=None): from bzrlib import shellcomplete shellcomplete.shellcomplete(context) class cmd_missing(Command): __doc__ = """Show unmerged/unpulled revisions between two branches. OTHER_BRANCH may be local or remote. To filter on a range of revisions, you can use the command -r begin..end -r revision requests a specific revision, -r ..end or -r begin.. are also valid. :Exit values: 1 - some missing revisions 0 - no missing revisions :Examples: Determine the missing revisions between this and the branch at the remembered pull location:: bzr missing Determine the missing revisions between this and another branch:: bzr missing http://server/branch Determine the missing revisions up to a specific revision on the other branch:: bzr missing -r ..-10 Determine the missing revisions up to a specific revision on this branch:: bzr missing --my-revision ..-10 """ _see_also = ['merge', 'pull'] takes_args = ['other_branch?'] takes_options = [ 'directory', Option('reverse', 'Reverse the order of revisions.'), Option('mine-only', 'Display changes in the local branch only.'), Option('this' , 'Same as --mine-only.'), Option('theirs-only', 'Display changes in the remote branch only.'), Option('other', 'Same as --theirs-only.'), 'log-format', 'show-ids', 'verbose', custom_help('revision', help='Filter on other branch revisions (inclusive). ' 'See "help revisionspec" for details.'), Option('my-revision', type=_parse_revision_str, help='Filter on local branch revisions (inclusive). ' 'See "help revisionspec" for details.'), Option('include-merged', 'Show all revisions in addition to the mainline ones.'), Option('include-merges', hidden=True, help='Historical alias for --include-merged.'), ] encoding_type = 'replace' @display_command def run(self, other_branch=None, reverse=False, mine_only=False, theirs_only=False, log_format=None, long=False, short=False, line=False, show_ids=False, verbose=False, this=False, other=False, include_merged=None, revision=None, my_revision=None, directory=u'.', include_merges=symbol_versioning.DEPRECATED_PARAMETER): from bzrlib.missing import find_unmerged, iter_log_revisions def message(s): if not is_quiet(): self.outf.write(s) if symbol_versioning.deprecated_passed(include_merges): ui.ui_factory.show_user_warning( 'deprecated_command_option', deprecated_name='--include-merges', recommended_name='--include-merged', deprecated_in_version='2.5', command=self.invoked_as) if include_merged is None: include_merged = include_merges else: raise errors.BzrCommandError(gettext( '{0} and {1} are mutually exclusive').format( '--include-merges', '--include-merged')) if include_merged is None: include_merged = False if this: mine_only = this if other: theirs_only = other # TODO: We should probably check that we don't have mine-only and # theirs-only set, but it gets complicated because we also have # this and other which could be used. restrict = 'all' if mine_only: restrict = 'local' elif theirs_only: restrict = 'remote' local_branch = Branch.open_containing(directory)[0] self.add_cleanup(local_branch.lock_read().unlock) parent = local_branch.get_parent() if other_branch is None: other_branch = parent if other_branch is None: raise errors.BzrCommandError(gettext("No peer location known" " or specified.")) display_url = urlutils.unescape_for_display(parent, self.outf.encoding) message(gettext("Using saved parent location: {0}\n").format( display_url)) remote_branch = Branch.open(other_branch) if remote_branch.base == local_branch.base: remote_branch = local_branch else: self.add_cleanup(remote_branch.lock_read().unlock) local_revid_range = _revision_range_to_revid_range( _get_revision_range(my_revision, local_branch, self.name())) remote_revid_range = _revision_range_to_revid_range( _get_revision_range(revision, remote_branch, self.name())) local_extra, remote_extra = find_unmerged( local_branch, remote_branch, restrict, backward=not reverse, include_merged=include_merged, local_revid_range=local_revid_range, remote_revid_range=remote_revid_range) if log_format is None: registry = log.log_formatter_registry log_format = registry.get_default(local_branch) lf = log_format(to_file=self.outf, show_ids=show_ids, show_timezone='original') status_code = 0 if local_extra and not theirs_only: message(ngettext("You have %d extra revision:\n", "You have %d extra revisions:\n", len(local_extra)) % len(local_extra)) rev_tag_dict = {} if local_branch.supports_tags(): rev_tag_dict = local_branch.tags.get_reverse_tag_dict() for revision in iter_log_revisions(local_extra, local_branch.repository, verbose, rev_tag_dict): lf.log_revision(revision) printed_local = True status_code = 1 else: printed_local = False if remote_extra and not mine_only: if printed_local is True: message("\n\n\n") message(ngettext("You are missing %d revision:\n", "You are missing %d revisions:\n", len(remote_extra)) % len(remote_extra)) if remote_branch.supports_tags(): rev_tag_dict = remote_branch.tags.get_reverse_tag_dict() for revision in iter_log_revisions(remote_extra, remote_branch.repository, verbose, rev_tag_dict): lf.log_revision(revision) status_code = 1 if mine_only and not local_extra: # We checked local, and found nothing extra message(gettext('This branch has no new revisions.\n')) elif theirs_only and not remote_extra: # We checked remote, and found nothing extra message(gettext('Other branch has no new revisions.\n')) elif not (mine_only or theirs_only or local_extra or remote_extra): # We checked both branches, and neither one had extra # revisions message(gettext("Branches are up to date.\n")) self.cleanup_now() if not status_code and parent is None and other_branch is not None: self.add_cleanup(local_branch.lock_write().unlock) # handle race conditions - a parent might be set while we run. if local_branch.get_parent() is None: local_branch.set_parent(remote_branch.base) return status_code class cmd_pack(Command): __doc__ = """Compress the data within a repository. This operation compresses the data within a bazaar repository. As bazaar supports automatic packing of repository, this operation is normally not required to be done manually. During the pack operation, bazaar takes a backup of existing repository data, i.e. pack files. This backup is eventually removed by bazaar automatically when it is safe to do so. To save disk space by removing the backed up pack files, the --clean-obsolete-packs option may be used. Warning: If you use --clean-obsolete-packs and your machine crashes during or immediately after repacking, you may be left with a state where the deletion has been written to disk but the new packs have not been. In this case the repository may be unusable. """ _see_also = ['repositories'] takes_args = ['branch_or_repo?'] takes_options = [ Option('clean-obsolete-packs', 'Delete obsolete packs to save disk space.'), ] def run(self, branch_or_repo='.', clean_obsolete_packs=False): dir = controldir.ControlDir.open_containing(branch_or_repo)[0] try: branch = dir.open_branch() repository = branch.repository except errors.NotBranchError: repository = dir.open_repository() repository.pack(clean_obsolete_packs=clean_obsolete_packs) class cmd_plugins(Command): __doc__ = """List the installed plugins. This command displays the list of installed plugins including version of plugin and a short description of each. --verbose shows the path where each plugin is located. A plugin is an external component for Bazaar that extends the revision control system, by adding or replacing code in Bazaar. Plugins can do a variety of things, including overriding commands, adding new commands, providing additional network transports and customizing log output. See the Bazaar Plugin Guide for further information on plugins including where to find them and how to install them. Instructions are also provided there on how to write new plugins using the Python programming language. """ takes_options = ['verbose'] @display_command def run(self, verbose=False): from bzrlib import plugin # Don't give writelines a generator as some codecs don't like that self.outf.writelines( list(plugin.describe_plugins(show_paths=verbose))) class cmd_testament(Command): __doc__ = """Show testament (signing-form) of a revision.""" takes_options = [ 'revision', Option('long', help='Produce long-format testament.'), Option('strict', help='Produce a strict-format testament.')] takes_args = ['branch?'] encoding_type = 'exact' @display_command def run(self, branch=u'.', revision=None, long=False, strict=False): from bzrlib.testament import Testament, StrictTestament if strict is True: testament_class = StrictTestament else: testament_class = Testament if branch == '.': b = Branch.open_containing(branch)[0] else: b = Branch.open(branch) self.add_cleanup(b.lock_read().unlock) if revision is None: rev_id = b.last_revision() else: rev_id = revision[0].as_revision_id(b) t = testament_class.from_revision(b.repository, rev_id) if long: self.outf.writelines(t.as_text_lines()) else: self.outf.write(t.as_short_text()) class cmd_annotate(Command): __doc__ = """Show the origin of each line in a file. This prints out the given file with an annotation on the left side indicating which revision, author and date introduced the change. If the origin is the same for a run of consecutive lines, it is shown only at the top, unless the --all option is given. """ # TODO: annotate directories; showing when each file was last changed # TODO: if the working copy is modified, show annotations on that # with new uncommitted lines marked aliases = ['ann', 'blame', 'praise'] takes_args = ['filename'] takes_options = [Option('all', help='Show annotations on all lines.'), Option('long', help='Show commit date in annotations.'), 'revision', 'show-ids', 'directory', ] encoding_type = 'exact' @display_command def run(self, filename, all=False, long=False, revision=None, show_ids=False, directory=None): from bzrlib.annotate import ( annotate_file_tree, ) wt, branch, relpath = \ _open_directory_or_containing_tree_or_branch(filename, directory) if wt is not None: self.add_cleanup(wt.lock_read().unlock) else: self.add_cleanup(branch.lock_read().unlock) tree = _get_one_revision_tree('annotate', revision, branch=branch) self.add_cleanup(tree.lock_read().unlock) if wt is not None and revision is None: file_id = wt.path2id(relpath) else: file_id = tree.path2id(relpath) if file_id is None: raise errors.NotVersionedError(filename) if wt is not None and revision is None: # If there is a tree and we're not annotating historical # versions, annotate the working tree's content. annotate_file_tree(wt, file_id, self.outf, long, all, show_ids=show_ids) else: annotate_file_tree(tree, file_id, self.outf, long, all, show_ids=show_ids, branch=branch) class cmd_re_sign(Command): __doc__ = """Create a digital signature for an existing revision.""" # TODO be able to replace existing ones. hidden = True # is this right ? takes_args = ['revision_id*'] takes_options = ['directory', 'revision'] def run(self, revision_id_list=None, revision=None, directory=u'.'): if revision_id_list is not None and revision is not None: raise errors.BzrCommandError(gettext('You can only supply one of revision_id or --revision')) if revision_id_list is None and revision is None: raise errors.BzrCommandError(gettext('You must supply either --revision or a revision_id')) b = WorkingTree.open_containing(directory)[0].branch self.add_cleanup(b.lock_write().unlock) return self._run(b, revision_id_list, revision) def _run(self, b, revision_id_list, revision): import bzrlib.gpg as gpg gpg_strategy = gpg.GPGStrategy(b.get_config_stack()) if revision_id_list is not None: b.repository.start_write_group() try: for revision_id in revision_id_list: b.repository.sign_revision(revision_id, gpg_strategy) except: b.repository.abort_write_group() raise else: b.repository.commit_write_group() elif revision is not None: if len(revision) == 1: revno, rev_id = revision[0].in_history(b) b.repository.start_write_group() try: b.repository.sign_revision(rev_id, gpg_strategy) except: b.repository.abort_write_group() raise else: b.repository.commit_write_group() elif len(revision) == 2: # are they both on rh- if so we can walk between them # might be nice to have a range helper for arbitrary # revision paths. hmm. from_revno, from_revid = revision[0].in_history(b) to_revno, to_revid = revision[1].in_history(b) if to_revid is None: to_revno = b.revno() if from_revno is None or to_revno is None: raise errors.BzrCommandError(gettext('Cannot sign a range of non-revision-history revisions')) b.repository.start_write_group() try: for revno in range(from_revno, to_revno + 1): b.repository.sign_revision(b.get_rev_id(revno), gpg_strategy) except: b.repository.abort_write_group() raise else: b.repository.commit_write_group() else: raise errors.BzrCommandError(gettext('Please supply either one revision, or a range.')) class cmd_bind(Command): __doc__ = """Convert the current branch into a checkout of the supplied branch. If no branch is supplied, rebind to the last bound location. Once converted into a checkout, commits must succeed on the master branch before they will be applied to the local branch. Bound branches use the nickname of its master branch unless it is set locally, in which case binding will update the local nickname to be that of the master. """ _see_also = ['checkouts', 'unbind'] takes_args = ['location?'] takes_options = ['directory'] def run(self, location=None, directory=u'.'): b, relpath = Branch.open_containing(directory) if location is None: try: location = b.get_old_bound_location() except errors.UpgradeRequired: raise errors.BzrCommandError(gettext('No location supplied. ' 'This format does not remember old locations.')) else: if location is None: if b.get_bound_location() is not None: raise errors.BzrCommandError( gettext('Branch is already bound')) else: raise errors.BzrCommandError( gettext('No location supplied' ' and no previous location known')) b_other = Branch.open(location) try: b.bind(b_other) except errors.DivergedBranches: raise errors.BzrCommandError(gettext('These branches have diverged.' ' Try merging, and then bind again.')) if b.get_config().has_explicit_nickname(): b.nick = b_other.nick class cmd_unbind(Command): __doc__ = """Convert the current checkout into a regular branch. After unbinding, the local branch is considered independent and subsequent commits will be local only. """ _see_also = ['checkouts', 'bind'] takes_args = [] takes_options = ['directory'] def run(self, directory=u'.'): b, relpath = Branch.open_containing(directory) if not b.unbind(): raise errors.BzrCommandError(gettext('Local branch is not bound')) class cmd_uncommit(Command): __doc__ = """Remove the last committed revision. --verbose will print out what is being removed. --dry-run will go through all the motions, but not actually remove anything. If --revision is specified, uncommit revisions to leave the branch at the specified revision. For example, "bzr uncommit -r 15" will leave the branch at revision 15. Uncommit leaves the working tree ready for a new commit. The only change it may make is to restore any pending merges that were present before the commit. """ # TODO: jam 20060108 Add an option to allow uncommit to remove # unreferenced information in 'branch-as-repository' branches. # TODO: jam 20060108 Add the ability for uncommit to remove unreferenced # information in shared branches as well. _see_also = ['commit'] takes_options = ['verbose', 'revision', Option('dry-run', help='Don\'t actually make changes.'), Option('force', help='Say yes to all questions.'), Option('keep-tags', help='Keep tags that point to removed revisions.'), Option('local', help="Only remove the commits from the local branch" " when in a checkout." ), ] takes_args = ['location?'] aliases = [] encoding_type = 'replace' def run(self, location=None, dry_run=False, verbose=False, revision=None, force=False, local=False, keep_tags=False): if location is None: location = u'.' control, relpath = controldir.ControlDir.open_containing(location) try: tree = control.open_workingtree() b = tree.branch except (errors.NoWorkingTree, errors.NotLocalUrl): tree = None b = control.open_branch() if tree is not None: self.add_cleanup(tree.lock_write().unlock) else: self.add_cleanup(b.lock_write().unlock) return self._run(b, tree, dry_run, verbose, revision, force, local, keep_tags) def _run(self, b, tree, dry_run, verbose, revision, force, local, keep_tags): from bzrlib.log import log_formatter, show_log from bzrlib.uncommit import uncommit last_revno, last_rev_id = b.last_revision_info() rev_id = None if revision is None: revno = last_revno rev_id = last_rev_id else: # 'bzr uncommit -r 10' actually means uncommit # so that the final tree is at revno 10. # but bzrlib.uncommit.uncommit() actually uncommits # the revisions that are supplied. # So we need to offset it by one revno = revision[0].in_history(b).revno + 1 if revno <= last_revno: rev_id = b.get_rev_id(revno) if rev_id is None or _mod_revision.is_null(rev_id): self.outf.write(gettext('No revisions to uncommit.\n')) return 1 lf = log_formatter('short', to_file=self.outf, show_timezone='original') show_log(b, lf, verbose=False, direction='forward', start_revision=revno, end_revision=last_revno) if dry_run: self.outf.write(gettext('Dry-run, pretending to remove' ' the above revisions.\n')) else: self.outf.write(gettext('The above revision(s) will be removed.\n')) if not force: if not ui.ui_factory.confirm_action( gettext(u'Uncommit these revisions'), 'bzrlib.builtins.uncommit', {}): self.outf.write(gettext('Canceled\n')) return 0 mutter('Uncommitting from {%s} to {%s}', last_rev_id, rev_id) uncommit(b, tree=tree, dry_run=dry_run, verbose=verbose, revno=revno, local=local, keep_tags=keep_tags) self.outf.write(gettext('You can restore the old tip by running:\n' ' bzr pull . -r revid:%s\n') % last_rev_id) class cmd_break_lock(Command): __doc__ = """Break a dead lock. This command breaks a lock on a repository, branch, working directory or config file. CAUTION: Locks should only be broken when you are sure that the process holding the lock has been stopped. You can get information on what locks are open via the 'bzr info [location]' command. :Examples: bzr break-lock bzr break-lock bzr+ssh://example.com/bzr/foo bzr break-lock --conf ~/.bazaar """ takes_args = ['location?'] takes_options = [ Option('config', help='LOCATION is the directory where the config lock is.'), Option('force', help='Do not ask for confirmation before breaking the lock.'), ] def run(self, location=None, config=False, force=False): if location is None: location = u'.' if force: ui.ui_factory = ui.ConfirmationUserInterfacePolicy(ui.ui_factory, None, {'bzrlib.lockdir.break': True}) if config: conf = _mod_config.LockableConfig(file_name=location) conf.break_lock() else: control, relpath = controldir.ControlDir.open_containing(location) try: control.break_lock() except NotImplementedError: pass class cmd_wait_until_signalled(Command): __doc__ = """Test helper for test_start_and_stop_bzr_subprocess_send_signal. This just prints a line to signal when it is ready, then blocks on stdin. """ hidden = True def run(self): sys.stdout.write("running\n") sys.stdout.flush() sys.stdin.readline() class cmd_serve(Command): __doc__ = """Run the bzr server.""" aliases = ['server'] takes_options = [ Option('inet', help='Serve on stdin/out for use from inetd or sshd.'), RegistryOption('protocol', help="Protocol to serve.", lazy_registry=('bzrlib.transport', 'transport_server_registry'), value_switches=True), Option('listen', help='Listen for connections on nominated address.', type=str), Option('port', help='Listen for connections on nominated port. Passing 0 as ' 'the port number will result in a dynamically allocated ' 'port. The default port depends on the protocol.', type=int), custom_help('directory', help='Serve contents of this directory.'), Option('allow-writes', help='By default the server is a readonly server. Supplying ' '--allow-writes enables write access to the contents of ' 'the served directory and below. Note that ``bzr serve`` ' 'does not perform authentication, so unless some form of ' 'external authentication is arranged supplying this ' 'option leads to global uncontrolled write access to your ' 'file system.' ), Option('client-timeout', type=float, help='Override the default idle client timeout (5min).'), ] def run(self, listen=None, port=None, inet=False, directory=None, allow_writes=False, protocol=None, client_timeout=None): from bzrlib import transport if directory is None: directory = os.getcwd() if protocol is None: protocol = transport.transport_server_registry.get() url = transport.location_to_url(directory) if not allow_writes: url = 'readonly+' + url t = transport.get_transport_from_url(url) protocol(t, listen, port, inet, client_timeout) class cmd_join(Command): __doc__ = """Combine a tree into its containing tree. This command requires the target tree to be in a rich-root format. The TREE argument should be an independent tree, inside another tree, but not part of it. (Such trees can be produced by "bzr split", but also by running "bzr branch" with the target inside a tree.) The result is a combined tree, with the subtree no longer an independent part. This is marked as a merge of the subtree into the containing tree, and all history is preserved. """ _see_also = ['split'] takes_args = ['tree'] takes_options = [ Option('reference', help='Join by reference.', hidden=True), ] def run(self, tree, reference=False): sub_tree = WorkingTree.open(tree) parent_dir = osutils.dirname(sub_tree.basedir) containing_tree = WorkingTree.open_containing(parent_dir)[0] repo = containing_tree.branch.repository if not repo.supports_rich_root(): raise errors.BzrCommandError(gettext( "Can't join trees because %s doesn't support rich root data.\n" "You can use bzr upgrade on the repository.") % (repo,)) if reference: try: containing_tree.add_reference(sub_tree) except errors.BadReferenceTarget, e: # XXX: Would be better to just raise a nicely printable # exception from the real origin. Also below. mbp 20070306 raise errors.BzrCommandError( gettext("Cannot join {0}. {1}").format(tree, e.reason)) else: try: containing_tree.subsume(sub_tree) except errors.BadSubsumeSource, e: raise errors.BzrCommandError( gettext("Cannot join {0}. {1}").format(tree, e.reason)) class cmd_split(Command): __doc__ = """Split a subdirectory of a tree into a separate tree. This command will produce a target tree in a format that supports rich roots, like 'rich-root' or 'rich-root-pack'. These formats cannot be converted into earlier formats like 'dirstate-tags'. The TREE argument should be a subdirectory of a working tree. That subdirectory will be converted into an independent tree, with its own branch. Commits in the top-level tree will not apply to the new subtree. """ _see_also = ['join'] takes_args = ['tree'] def run(self, tree): containing_tree, subdir = WorkingTree.open_containing(tree) sub_id = containing_tree.path2id(subdir) if sub_id is None: raise errors.NotVersionedError(subdir) try: containing_tree.extract(sub_id) except errors.RootNotRich: raise errors.RichRootUpgradeRequired(containing_tree.branch.base) class cmd_merge_directive(Command): __doc__ = """Generate a merge directive for auto-merge tools. A directive requests a merge to be performed, and also provides all the information necessary to do so. This means it must either include a revision bundle, or the location of a branch containing the desired revision. A submit branch (the location to merge into) must be supplied the first time the command is issued. After it has been supplied once, it will be remembered as the default. A public branch is optional if a revision bundle is supplied, but required if --diff or --plain is specified. It will be remembered as the default after the first use. """ takes_args = ['submit_branch?', 'public_branch?'] hidden = True _see_also = ['send'] takes_options = [ 'directory', RegistryOption.from_kwargs('patch-type', 'The type of patch to include in the directive.', title='Patch type', value_switches=True, enum_switch=False, bundle='Bazaar revision bundle (default).', diff='Normal unified diff.', plain='No patch, just directive.'), Option('sign', help='GPG-sign the directive.'), 'revision', Option('mail-to', type=str, help='Instead of printing the directive, email to this address.'), Option('message', type=str, short_name='m', help='Message to use when committing this merge.') ] encoding_type = 'exact' def run(self, submit_branch=None, public_branch=None, patch_type='bundle', sign=False, revision=None, mail_to=None, message=None, directory=u'.'): from bzrlib.revision import ensure_null, NULL_REVISION include_patch, include_bundle = { 'plain': (False, False), 'diff': (True, False), 'bundle': (True, True), }[patch_type] branch = Branch.open(directory) stored_submit_branch = branch.get_submit_branch() if submit_branch is None: submit_branch = stored_submit_branch else: if stored_submit_branch is None: branch.set_submit_branch(submit_branch) if submit_branch is None: submit_branch = branch.get_parent() if submit_branch is None: raise errors.BzrCommandError(gettext('No submit branch specified or known')) stored_public_branch = branch.get_public_branch() if public_branch is None: public_branch = stored_public_branch elif stored_public_branch is None: # FIXME: Should be done only if we succeed ? -- vila 2012-01-03 branch.set_public_branch(public_branch) if not include_bundle and public_branch is None: raise errors.BzrCommandError(gettext('No public branch specified or' ' known')) base_revision_id = None if revision is not None: if len(revision) > 2: raise errors.BzrCommandError(gettext('bzr merge-directive takes ' 'at most two one revision identifiers')) revision_id = revision[-1].as_revision_id(branch) if len(revision) == 2: base_revision_id = revision[0].as_revision_id(branch) else: revision_id = branch.last_revision() revision_id = ensure_null(revision_id) if revision_id == NULL_REVISION: raise errors.BzrCommandError(gettext('No revisions to bundle.')) directive = merge_directive.MergeDirective2.from_objects( branch.repository, revision_id, time.time(), osutils.local_time_offset(), submit_branch, public_branch=public_branch, include_patch=include_patch, include_bundle=include_bundle, message=message, base_revision_id=base_revision_id) if mail_to is None: if sign: self.outf.write(directive.to_signed(branch)) else: self.outf.writelines(directive.to_lines()) else: message = directive.to_email(mail_to, branch, sign) s = SMTPConnection(branch.get_config_stack()) s.send_email(message) class cmd_send(Command): __doc__ = """Mail or create a merge-directive for submitting changes. A merge directive provides many things needed for requesting merges: * A machine-readable description of the merge to perform * An optional patch that is a preview of the changes requested * An optional bundle of revision data, so that the changes can be applied directly from the merge directive, without retrieving data from a branch. `bzr send` creates a compact data set that, when applied using bzr merge, has the same effect as merging from the source branch. By default the merge directive is self-contained and can be applied to any branch containing submit_branch in its ancestory without needing access to the source branch. If --no-bundle is specified, then Bazaar doesn't send the contents of the revisions, but only a structured request to merge from the public_location. In that case the public_branch is needed and it must be up-to-date and accessible to the recipient. The public_branch is always included if known, so that people can check it later. The submit branch defaults to the parent of the source branch, but can be overridden. Both submit branch and public branch will be remembered in branch.conf the first time they are used for a particular branch. The source branch defaults to that containing the working directory, but can be changed using --from. Both the submit branch and the public branch follow the usual behavior with respect to --remember: If there is no default location set, the first send will set it (use --no-remember to avoid setting it). After that, you can omit the location to use the default. To change the default, use --remember. The value will only be saved if the location can be accessed. In order to calculate those changes, bzr must analyse the submit branch. Therefore it is most efficient for the submit branch to be a local mirror. If a public location is known for the submit_branch, that location is used in the merge directive. The default behaviour is to send the merge directive by mail, unless -o is given, in which case it is sent to a file. Mail is sent using your preferred mail program. This should be transparent on Windows (it uses MAPI). On Unix, it requires the xdg-email utility. If the preferred client can't be found (or used), your editor will be used. To use a specific mail program, set the mail_client configuration option. (For Thunderbird 1.5, this works around some bugs.) Supported values for specific clients are "claws", "evolution", "kmail", "mail.app" (MacOS X's Mail.app), "mutt", and "thunderbird"; generic options are "default", "editor", "emacsclient", "mapi", and "xdg-email". Plugins may also add supported clients. If mail is being sent, a to address is required. This can be supplied either on the commandline, by setting the submit_to configuration option in the branch itself or the child_submit_to configuration option in the submit branch. Two formats are currently supported: "4" uses revision bundle format 4 and merge directive format 2. It is significantly faster and smaller than older formats. It is compatible with Bazaar 0.19 and later. It is the default. "0.9" uses revision bundle format 0.9 and merge directive format 1. It is compatible with Bazaar 0.12 - 0.18. The merge directives created by bzr send may be applied using bzr merge or bzr pull by specifying a file containing a merge directive as the location. bzr send makes extensive use of public locations to map local locations into URLs that can be used by other people. See `bzr help configuration` to set them, and use `bzr info` to display them. """ encoding_type = 'exact' _see_also = ['merge', 'pull'] takes_args = ['submit_branch?', 'public_branch?'] takes_options = [ Option('no-bundle', help='Do not include a bundle in the merge directive.'), Option('no-patch', help='Do not include a preview patch in the merge' ' directive.'), Option('remember', help='Remember submit and public branch.'), Option('from', help='Branch to generate the submission from, ' 'rather than the one containing the working directory.', short_name='f', type=unicode), Option('output', short_name='o', help='Write merge directive to this file or directory; ' 'use - for stdout.', type=unicode), Option('strict', help='Refuse to send if there are uncommitted changes in' ' the working tree, --no-strict disables the check.'), Option('mail-to', help='Mail the request to this address.', type=unicode), 'revision', 'message', Option('body', help='Body for the email.', type=unicode), RegistryOption('format', help='Use the specified output format.', lazy_registry=('bzrlib.send', 'format_registry')), ] def run(self, submit_branch=None, public_branch=None, no_bundle=False, no_patch=False, revision=None, remember=None, output=None, format=None, mail_to=None, message=None, body=None, strict=None, **kwargs): from bzrlib.send import send return send(submit_branch, revision, public_branch, remember, format, no_bundle, no_patch, output, kwargs.get('from', '.'), mail_to, message, body, self.outf, strict=strict) class cmd_bundle_revisions(cmd_send): __doc__ = """Create a merge-directive for submitting changes. A merge directive provides many things needed for requesting merges: * A machine-readable description of the merge to perform * An optional patch that is a preview of the changes requested * An optional bundle of revision data, so that the changes can be applied directly from the merge directive, without retrieving data from a branch. If --no-bundle is specified, then public_branch is needed (and must be up-to-date), so that the receiver can perform the merge using the public_branch. The public_branch is always included if known, so that people can check it later. The submit branch defaults to the parent, but can be overridden. Both submit branch and public branch will be remembered if supplied. If a public_branch is known for the submit_branch, that public submit branch is used in the merge instructions. This means that a local mirror can be used as your actual submit branch, once you have set public_branch for that mirror. Two formats are currently supported: "4" uses revision bundle format 4 and merge directive format 2. It is significantly faster and smaller than older formats. It is compatible with Bazaar 0.19 and later. It is the default. "0.9" uses revision bundle format 0.9 and merge directive format 1. It is compatible with Bazaar 0.12 - 0.18. """ takes_options = [ Option('no-bundle', help='Do not include a bundle in the merge directive.'), Option('no-patch', help='Do not include a preview patch in the merge' ' directive.'), Option('remember', help='Remember submit and public branch.'), Option('from', help='Branch to generate the submission from, ' 'rather than the one containing the working directory.', short_name='f', type=unicode), Option('output', short_name='o', help='Write directive to this file.', type=unicode), Option('strict', help='Refuse to bundle revisions if there are uncommitted' ' changes in the working tree, --no-strict disables the check.'), 'revision', RegistryOption('format', help='Use the specified output format.', lazy_registry=('bzrlib.send', 'format_registry')), ] aliases = ['bundle'] _see_also = ['send', 'merge'] hidden = True def run(self, submit_branch=None, public_branch=None, no_bundle=False, no_patch=False, revision=None, remember=False, output=None, format=None, strict=None, **kwargs): if output is None: output = '-' from bzrlib.send import send return send(submit_branch, revision, public_branch, remember, format, no_bundle, no_patch, output, kwargs.get('from', '.'), None, None, None, self.outf, strict=strict) class cmd_tag(Command): __doc__ = """Create, remove or modify a tag naming a revision. Tags give human-meaningful names to revisions. Commands that take a -r (--revision) option can be given -rtag:X, where X is any previously created tag. Tags are stored in the branch. Tags are copied from one branch to another along when you branch, push, pull or merge. It is an error to give a tag name that already exists unless you pass --force, in which case the tag is moved to point to the new revision. To rename a tag (change the name but keep it on the same revsion), run ``bzr tag new-name -r tag:old-name`` and then ``bzr tag --delete oldname``. If no tag name is specified it will be determined through the 'automatic_tag_name' hook. This can e.g. be used to automatically tag upstream releases by reading configure.ac. See ``bzr help hooks`` for details. """ _see_also = ['commit', 'tags'] takes_args = ['tag_name?'] takes_options = [ Option('delete', help='Delete this tag rather than placing it.', ), custom_help('directory', help='Branch in which to place the tag.'), Option('force', help='Replace existing tags.', ), 'revision', ] def run(self, tag_name=None, delete=None, directory='.', force=None, revision=None, ): branch, relpath = Branch.open_containing(directory) self.add_cleanup(branch.lock_write().unlock) if delete: if tag_name is None: raise errors.BzrCommandError(gettext("No tag specified to delete.")) branch.tags.delete_tag(tag_name) note(gettext('Deleted tag %s.') % tag_name) else: if revision: if len(revision) != 1: raise errors.BzrCommandError(gettext( "Tags can only be placed on a single revision, " "not on a range")) revision_id = revision[0].as_revision_id(branch) else: revision_id = branch.last_revision() if tag_name is None: tag_name = branch.automatic_tag_name(revision_id) if tag_name is None: raise errors.BzrCommandError(gettext( "Please specify a tag name.")) try: existing_target = branch.tags.lookup_tag(tag_name) except errors.NoSuchTag: existing_target = None if not force and existing_target not in (None, revision_id): raise errors.TagAlreadyExists(tag_name) if existing_target == revision_id: note(gettext('Tag %s already exists for that revision.') % tag_name) else: branch.tags.set_tag(tag_name, revision_id) if existing_target is None: note(gettext('Created tag %s.') % tag_name) else: note(gettext('Updated tag %s.') % tag_name) class cmd_tags(Command): __doc__ = """List tags. This command shows a table of tag names and the revisions they reference. """ _see_also = ['tag'] takes_options = [ custom_help('directory', help='Branch whose tags should be displayed.'), RegistryOption('sort', 'Sort tags by different criteria.', title='Sorting', lazy_registry=('bzrlib.tag', 'tag_sort_methods') ), 'show-ids', 'revision', ] @display_command def run(self, directory='.', sort=None, show_ids=False, revision=None): from bzrlib.tag import tag_sort_methods branch, relpath = Branch.open_containing(directory) tags = branch.tags.get_tag_dict().items() if not tags: return self.add_cleanup(branch.lock_read().unlock) if revision: # Restrict to the specified range tags = self._tags_for_range(branch, revision) if sort is None: sort = tag_sort_methods.get() sort(branch, tags) if not show_ids: # [ (tag, revid), ... ] -> [ (tag, dotted_revno), ... ] for index, (tag, revid) in enumerate(tags): try: revno = branch.revision_id_to_dotted_revno(revid) if isinstance(revno, tuple): revno = '.'.join(map(str, revno)) except (errors.NoSuchRevision, errors.GhostRevisionsHaveNoRevno, errors.UnsupportedOperation): # Bad tag data/merges can lead to tagged revisions # which are not in this branch. Fail gracefully ... revno = '?' tags[index] = (tag, revno) self.cleanup_now() for tag, revspec in tags: self.outf.write('%-20s %s\n' % (tag, revspec)) def _tags_for_range(self, branch, revision): range_valid = True rev1, rev2 = _get_revision_range(revision, branch, self.name()) revid1, revid2 = rev1.rev_id, rev2.rev_id # _get_revision_range will always set revid2 if it's not specified. # If revid1 is None, it means we want to start from the branch # origin which is always a valid ancestor. If revid1 == revid2, the # ancestry check is useless. if revid1 and revid1 != revid2: # FIXME: We really want to use the same graph than # branch.iter_merge_sorted_revisions below, but this is not # easily available -- vila 2011-09-23 if branch.repository.get_graph().is_ancestor(revid2, revid1): # We don't want to output anything in this case... return [] # only show revisions between revid1 and revid2 (inclusive) tagged_revids = branch.tags.get_reverse_tag_dict() found = [] for r in branch.iter_merge_sorted_revisions( start_revision_id=revid2, stop_revision_id=revid1, stop_rule='include'): revid_tags = tagged_revids.get(r[0], None) if revid_tags: found.extend([(tag, r[0]) for tag in revid_tags]) return found class cmd_reconfigure(Command): __doc__ = """Reconfigure the type of a bzr directory. A target configuration must be specified. For checkouts, the bind-to location will be auto-detected if not specified. The order of preference is 1. For a lightweight checkout, the current bound location. 2. For branches that used to be checkouts, the previously-bound location. 3. The push location. 4. The parent location. If none of these is available, --bind-to must be specified. """ _see_also = ['branches', 'checkouts', 'standalone-trees', 'working-trees'] takes_args = ['location?'] takes_options = [ RegistryOption.from_kwargs( 'tree_type', title='Tree type', help='The relation between branch and tree.', value_switches=True, enum_switch=False, branch='Reconfigure to be an unbound branch with no working tree.', tree='Reconfigure to be an unbound branch with a working tree.', checkout='Reconfigure to be a bound branch with a working tree.', lightweight_checkout='Reconfigure to be a lightweight' ' checkout (with no local history).', ), RegistryOption.from_kwargs( 'repository_type', title='Repository type', help='Location fo the repository.', value_switches=True, enum_switch=False, standalone='Reconfigure to be a standalone branch ' '(i.e. stop using shared repository).', use_shared='Reconfigure to use a shared repository.', ), RegistryOption.from_kwargs( 'repository_trees', title='Trees in Repository', help='Whether new branches in the repository have trees.', value_switches=True, enum_switch=False, with_trees='Reconfigure repository to create ' 'working trees on branches by default.', with_no_trees='Reconfigure repository to not create ' 'working trees on branches by default.' ), Option('bind-to', help='Branch to bind checkout to.', type=str), Option('force', help='Perform reconfiguration even if local changes' ' will be lost.'), Option('stacked-on', help='Reconfigure a branch to be stacked on another branch.', type=unicode, ), Option('unstacked', help='Reconfigure a branch to be unstacked. This ' 'may require copying substantial data into it.', ), ] def run(self, location=None, bind_to=None, force=False, tree_type=None, repository_type=None, repository_trees=None, stacked_on=None, unstacked=None): directory = controldir.ControlDir.open(location) if stacked_on and unstacked: raise errors.BzrCommandError(gettext("Can't use both --stacked-on and --unstacked")) elif stacked_on is not None: reconfigure.ReconfigureStackedOn().apply(directory, stacked_on) elif unstacked: reconfigure.ReconfigureUnstacked().apply(directory) # At the moment you can use --stacked-on and a different # reconfiguration shape at the same time; there seems no good reason # to ban it. if (tree_type is None and repository_type is None and repository_trees is None): if stacked_on or unstacked: return else: raise errors.BzrCommandError(gettext('No target configuration ' 'specified')) reconfiguration = None if tree_type == 'branch': reconfiguration = reconfigure.Reconfigure.to_branch(directory) elif tree_type == 'tree': reconfiguration = reconfigure.Reconfigure.to_tree(directory) elif tree_type == 'checkout': reconfiguration = reconfigure.Reconfigure.to_checkout( directory, bind_to) elif tree_type == 'lightweight-checkout': reconfiguration = reconfigure.Reconfigure.to_lightweight_checkout( directory, bind_to) if reconfiguration: reconfiguration.apply(force) reconfiguration = None if repository_type == 'use-shared': reconfiguration = reconfigure.Reconfigure.to_use_shared(directory) elif repository_type == 'standalone': reconfiguration = reconfigure.Reconfigure.to_standalone(directory) if reconfiguration: reconfiguration.apply(force) reconfiguration = None if repository_trees == 'with-trees': reconfiguration = reconfigure.Reconfigure.set_repository_trees( directory, True) elif repository_trees == 'with-no-trees': reconfiguration = reconfigure.Reconfigure.set_repository_trees( directory, False) if reconfiguration: reconfiguration.apply(force) reconfiguration = None class cmd_switch(Command): __doc__ = """Set the branch of a checkout and update. For lightweight checkouts, this changes the branch being referenced. For heavyweight checkouts, this checks that there are no local commits versus the current bound branch, then it makes the local branch a mirror of the new location and binds to it. In both cases, the working tree is updated and uncommitted changes are merged. The user can commit or revert these as they desire. Pending merges need to be committed or reverted before using switch. The path to the branch to switch to can be specified relative to the parent directory of the current branch. For example, if you are currently in a checkout of /path/to/branch, specifying 'newbranch' will find a branch at /path/to/newbranch. Bound branches use the nickname of its master branch unless it is set locally, in which case switching will update the local nickname to be that of the master. """ takes_args = ['to_location?'] takes_options = ['directory', Option('force', help='Switch even if local commits will be lost.'), 'revision', Option('create-branch', short_name='b', help='Create the target branch from this one before' ' switching to it.'), Option('store', help='Store and restore uncommitted changes in the' ' branch.'), ] def run(self, to_location=None, force=False, create_branch=False, revision=None, directory=u'.', store=False): from bzrlib import switch tree_location = directory revision = _get_one_revision('switch', revision) possible_transports = [] control_dir = controldir.ControlDir.open_containing(tree_location, possible_transports=possible_transports)[0] if to_location is None: if revision is None: raise errors.BzrCommandError(gettext('You must supply either a' ' revision or a location')) to_location = tree_location try: branch = control_dir.open_branch( possible_transports=possible_transports) had_explicit_nick = branch.get_config().has_explicit_nickname() except errors.NotBranchError: branch = None had_explicit_nick = False if create_branch: if branch is None: raise errors.BzrCommandError( gettext('cannot create branch without source branch')) to_location = lookup_new_sibling_branch(control_dir, to_location, possible_transports=possible_transports) to_branch = branch.bzrdir.sprout(to_location, possible_transports=possible_transports, source_branch=branch).open_branch() else: try: to_branch = Branch.open(to_location, possible_transports=possible_transports) except errors.NotBranchError: to_branch = open_sibling_branch(control_dir, to_location, possible_transports=possible_transports) if revision is not None: revision = revision.as_revision_id(to_branch) switch.switch(control_dir, to_branch, force, revision_id=revision, store_uncommitted=store) if had_explicit_nick: branch = control_dir.open_branch() #get the new branch! branch.nick = to_branch.nick note(gettext('Switched to branch: %s'), urlutils.unescape_for_display(to_branch.base, 'utf-8')) class cmd_view(Command): __doc__ = """Manage filtered views. Views provide a mask over the tree so that users can focus on a subset of a tree when doing their work. After creating a view, commands that support a list of files - status, diff, commit, etc - effectively have that list of files implicitly given each time. An explicit list of files can still be given but those files must be within the current view. In most cases, a view has a short life-span: it is created to make a selected change and is deleted once that change is committed. At other times, you may wish to create one or more named views and switch between them. To disable the current view without deleting it, you can switch to the pseudo view called ``off``. This can be useful when you need to see the whole tree for an operation or two (e.g. merge) but want to switch back to your view after that. :Examples: To define the current view:: bzr view file1 dir1 ... To list the current view:: bzr view To delete the current view:: bzr view --delete To disable the current view without deleting it:: bzr view --switch off To define a named view and switch to it:: bzr view --name view-name file1 dir1 ... To list a named view:: bzr view --name view-name To delete a named view:: bzr view --name view-name --delete To switch to a named view:: bzr view --switch view-name To list all views defined:: bzr view --all To delete all views:: bzr view --delete --all """ _see_also = [] takes_args = ['file*'] takes_options = [ Option('all', help='Apply list or delete action to all views.', ), Option('delete', help='Delete the view.', ), Option('name', help='Name of the view to define, list or delete.', type=unicode, ), Option('switch', help='Name of the view to switch to.', type=unicode, ), ] def run(self, file_list, all=False, delete=False, name=None, switch=None, ): tree, file_list = WorkingTree.open_containing_paths(file_list, apply_view=False) current_view, view_dict = tree.views.get_view_info() if name is None: name = current_view if delete: if file_list: raise errors.BzrCommandError(gettext( "Both --delete and a file list specified")) elif switch: raise errors.BzrCommandError(gettext( "Both --delete and --switch specified")) elif all: tree.views.set_view_info(None, {}) self.outf.write(gettext("Deleted all views.\n")) elif name is None: raise errors.BzrCommandError(gettext("No current view to delete")) else: tree.views.delete_view(name) self.outf.write(gettext("Deleted '%s' view.\n") % name) elif switch: if file_list: raise errors.BzrCommandError(gettext( "Both --switch and a file list specified")) elif all: raise errors.BzrCommandError(gettext( "Both --switch and --all specified")) elif switch == 'off': if current_view is None: raise errors.BzrCommandError(gettext("No current view to disable")) tree.views.set_view_info(None, view_dict) self.outf.write(gettext("Disabled '%s' view.\n") % (current_view)) else: tree.views.set_view_info(switch, view_dict) view_str = views.view_display_str(tree.views.lookup_view()) self.outf.write(gettext("Using '{0}' view: {1}\n").format(switch, view_str)) elif all: if view_dict: self.outf.write(gettext('Views defined:\n')) for view in sorted(view_dict): if view == current_view: active = "=>" else: active = " " view_str = views.view_display_str(view_dict[view]) self.outf.write('%s %-20s %s\n' % (active, view, view_str)) else: self.outf.write(gettext('No views defined.\n')) elif file_list: if name is None: # No name given and no current view set name = 'my' elif name == 'off': raise errors.BzrCommandError(gettext( "Cannot change the 'off' pseudo view")) tree.views.set_view(name, sorted(file_list)) view_str = views.view_display_str(tree.views.lookup_view()) self.outf.write(gettext("Using '{0}' view: {1}\n").format(name, view_str)) else: # list the files if name is None: # No name given and no current view set self.outf.write(gettext('No current view.\n')) else: view_str = views.view_display_str(tree.views.lookup_view(name)) self.outf.write(gettext("'{0}' view is: {1}\n").format(name, view_str)) class cmd_hooks(Command): __doc__ = """Show hooks.""" hidden = True def run(self): for hook_key in sorted(hooks.known_hooks.keys()): some_hooks = hooks.known_hooks_key_to_object(hook_key) self.outf.write("%s:\n" % type(some_hooks).__name__) for hook_name, hook_point in sorted(some_hooks.items()): self.outf.write(" %s:\n" % (hook_name,)) found_hooks = list(hook_point) if found_hooks: for hook in found_hooks: self.outf.write(" %s\n" % (some_hooks.get_hook_name(hook),)) else: self.outf.write(gettext(" \n")) class cmd_remove_branch(Command): __doc__ = """Remove a branch. This will remove the branch from the specified location but will keep any working tree or repository in place. :Examples: Remove the branch at repo/trunk:: bzr remove-branch repo/trunk """ takes_args = ["location?"] takes_options = ['directory', Option('force', help='Remove branch even if it is the active branch.')] aliases = ["rmbranch"] def run(self, directory=None, location=None, force=False): br = open_nearby_branch(near=directory, location=location) if not force and br.bzrdir.has_workingtree(): try: active_branch = br.bzrdir.open_branch(name="") except errors.NotBranchError: active_branch = None if (active_branch is not None and br.control_url == active_branch.control_url): raise errors.BzrCommandError( gettext("Branch is active. Use --force to remove it.")) br.bzrdir.destroy_branch(br.name) class cmd_shelve(Command): __doc__ = """Temporarily set aside some changes from the current tree. Shelve allows you to temporarily put changes you've made "on the shelf", ie. out of the way, until a later time when you can bring them back from the shelf with the 'unshelve' command. The changes are stored alongside your working tree, and so they aren't propagated along with your branch nor will they survive its deletion. If shelve --list is specified, previously-shelved changes are listed. Shelve is intended to help separate several sets of changes that have been inappropriately mingled. If you just want to get rid of all changes and you don't need to restore them later, use revert. If you want to shelve all text changes at once, use shelve --all. If filenames are specified, only the changes to those files will be shelved. Other files will be left untouched. If a revision is specified, changes since that revision will be shelved. You can put multiple items on the shelf, and by default, 'unshelve' will restore the most recently shelved changes. For complicated changes, it is possible to edit the changes in a separate editor program to decide what the file remaining in the working copy should look like. To do this, add the configuration option change_editor = PROGRAM @new_path @old_path where @new_path is replaced with the path of the new version of the file and @old_path is replaced with the path of the old version of the file. The PROGRAM should save the new file with the desired contents of the file in the working tree. """ takes_args = ['file*'] takes_options = [ 'directory', 'revision', Option('all', help='Shelve all changes.'), 'message', RegistryOption('writer', 'Method to use for writing diffs.', bzrlib.option.diff_writer_registry, value_switches=True, enum_switch=False), Option('list', help='List shelved changes.'), Option('destroy', help='Destroy removed changes instead of shelving them.'), ] _see_also = ['unshelve', 'configuration'] def run(self, revision=None, all=False, file_list=None, message=None, writer=None, list=False, destroy=False, directory=None): if list: return self.run_for_list(directory=directory) from bzrlib.shelf_ui import Shelver if writer is None: writer = bzrlib.option.diff_writer_registry.get() try: shelver = Shelver.from_args(writer(sys.stdout), revision, all, file_list, message, destroy=destroy, directory=directory) try: shelver.run() finally: shelver.finalize() except errors.UserAbort: return 0 def run_for_list(self, directory=None): if directory is None: directory = u'.' tree = WorkingTree.open_containing(directory)[0] self.add_cleanup(tree.lock_read().unlock) manager = tree.get_shelf_manager() shelves = manager.active_shelves() if len(shelves) == 0: note(gettext('No shelved changes.')) return 0 for shelf_id in reversed(shelves): message = manager.get_metadata(shelf_id).get('message') if message is None: message = '' self.outf.write('%3d: %s\n' % (shelf_id, message)) return 1 class cmd_unshelve(Command): __doc__ = """Restore shelved changes. By default, the most recently shelved changes are restored. However if you specify a shelf by id those changes will be restored instead. This works best when the changes don't depend on each other. """ takes_args = ['shelf_id?'] takes_options = [ 'directory', RegistryOption.from_kwargs( 'action', help="The action to perform.", enum_switch=False, value_switches=True, apply="Apply changes and remove from the shelf.", dry_run="Show changes, but do not apply or remove them.", preview="Instead of unshelving the changes, show the diff that " "would result from unshelving.", delete_only="Delete changes without applying them.", keep="Apply changes but don't delete them.", ) ] _see_also = ['shelve'] def run(self, shelf_id=None, action='apply', directory=u'.'): from bzrlib.shelf_ui import Unshelver unshelver = Unshelver.from_args(shelf_id, action, directory=directory) try: unshelver.run() finally: unshelver.tree.unlock() class cmd_clean_tree(Command): __doc__ = """Remove unwanted files from working tree. By default, only unknown files, not ignored files, are deleted. Versioned files are never deleted. Another class is 'detritus', which includes files emitted by bzr during normal operations and selftests. (The value of these files decreases with time.) If no options are specified, unknown files are deleted. Otherwise, option flags are respected, and may be combined. To check what clean-tree will do, use --dry-run. """ takes_options = ['directory', Option('ignored', help='Delete all ignored files.'), Option('detritus', help='Delete conflict files, merge and revert' ' backups, and failed selftest dirs.'), Option('unknown', help='Delete files unknown to bzr (default).'), Option('dry-run', help='Show files to delete instead of' ' deleting them.'), Option('force', help='Do not prompt before deleting.')] def run(self, unknown=False, ignored=False, detritus=False, dry_run=False, force=False, directory=u'.'): from bzrlib.clean_tree import clean_tree if not (unknown or ignored or detritus): unknown = True if dry_run: force = True clean_tree(directory, unknown=unknown, ignored=ignored, detritus=detritus, dry_run=dry_run, no_prompt=force) class cmd_reference(Command): __doc__ = """list, view and set branch locations for nested trees. If no arguments are provided, lists the branch locations for nested trees. If one argument is provided, display the branch location for that tree. If two arguments are provided, set the branch location for that tree. """ hidden = True takes_args = ['path?', 'location?'] def run(self, path=None, location=None): branchdir = '.' if path is not None: branchdir = path tree, branch, relpath =( controldir.ControlDir.open_containing_tree_or_branch(branchdir)) if path is not None: path = relpath if tree is None: tree = branch.basis_tree() if path is None: info = branch._get_all_reference_info().iteritems() self._display_reference_info(tree, branch, info) else: file_id = tree.path2id(path) if file_id is None: raise errors.NotVersionedError(path) if location is None: info = [(file_id, branch.get_reference_info(file_id))] self._display_reference_info(tree, branch, info) else: branch.set_reference_info(file_id, path, location) def _display_reference_info(self, tree, branch, info): ref_list = [] for file_id, (path, location) in info: try: path = tree.id2path(file_id) except errors.NoSuchId: pass ref_list.append((path, location)) for path, location in sorted(ref_list): self.outf.write('%s %s\n' % (path, location)) class cmd_export_pot(Command): __doc__ = """Export command helps and error messages in po format.""" hidden = True takes_options = [Option('plugin', help='Export help text from named command '\ '(defaults to all built in commands).', type=str), Option('include-duplicates', help='Output multiple copies of the same msgid ' 'string if it appears more than once.'), ] def run(self, plugin=None, include_duplicates=False): from bzrlib.export_pot import export_pot export_pot(self.outf, plugin, include_duplicates) def _register_lazy_builtins(): # register lazy builtins from other modules; called at startup and should # be only called once. for (name, aliases, module_name) in [ ('cmd_bundle_info', [], 'bzrlib.bundle.commands'), ('cmd_config', [], 'bzrlib.config'), ('cmd_dpush', [], 'bzrlib.foreign'), ('cmd_version_info', [], 'bzrlib.cmd_version_info'), ('cmd_resolve', ['resolved'], 'bzrlib.conflicts'), ('cmd_conflicts', [], 'bzrlib.conflicts'), ('cmd_ping', [], 'bzrlib.smart.ping'), ('cmd_sign_my_commits', [], 'bzrlib.commit_signature_commands'), ('cmd_verify_signatures', [], 'bzrlib.commit_signature_commands'), ('cmd_test_script', [], 'bzrlib.cmd_test_script'), ]: builtin_command_registry.register_lazy(name, aliases, module_name) bzr-2.7.0/bzrlib/bundle/0000755000000000000000000000000010437061064013201 5ustar 00000000000000bzr-2.7.0/bzrlib/bzr_distutils.py0000644000000000000000000001016511673635356015224 0ustar 00000000000000# -*- coding: utf-8 -*- # # Copyright (C) 2007,2009,2011 Canonical Ltd. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # This code is from bzr-explorer and modified for bzr. """build_mo command for setup.py""" from __future__ import absolute_import from distutils import log from distutils.core import Command from distutils.dep_util import newer from distutils.spawn import find_executable import os import re class build_mo(Command): """Subcommand of build command: build_mo""" description = 'compile po files to mo files' # List of options: # - long name, # - short name (None if no short name), # - help string. user_options = [('build-dir=', 'd', 'Directory to build locale files'), ('output-base=', 'o', 'mo-files base name'), ('source-dir=', None, 'Directory with sources po files'), ('force', 'f', 'Force creation of mo files'), ('lang=', None, 'Comma-separated list of languages ' 'to process'), ] boolean_options = ['force'] def initialize_options(self): self.build_dir = None self.output_base = None self.source_dir = None self.force = None self.lang = None def finalize_options(self): self.set_undefined_options('build', ('force', 'force')) self.prj_name = self.distribution.get_name() if self.build_dir is None: self.build_dir = 'bzrlib/locale' if not self.output_base: self.output_base = self.prj_name or 'messages' if self.source_dir is None: self.source_dir = 'po' if self.lang is None: re_po = re.compile(r'^([a-zA-Z_]+)\.po$') self.lang = [] for i in os.listdir(self.source_dir): mo = re_po.match(i) if mo: self.lang.append(mo.group(1)) else: self.lang = [i.strip() for i in self.lang.split(',') if i.strip()] def run(self): """Run msgfmt for each language""" if not self.lang: return if find_executable('msgfmt') is None: log.warn("GNU gettext msgfmt utility not found!") log.warn("Skip compiling po files.") return if 'en' in self.lang: if find_executable('msginit') is None: log.warn("GNU gettext msginit utility not found!") log.warn("Skip creating English PO file.") else: log.info('Creating English PO file...') pot = (self.prj_name or 'messages') + '.pot' en_po = 'en.po' self.spawn(['msginit', '--no-translator', '-l', 'en', '-i', os.path.join(self.source_dir, pot), '-o', os.path.join(self.source_dir, en_po), ]) basename = self.output_base if not basename.endswith('.mo'): basename += '.mo' for lang in self.lang: po = os.path.join('po', lang + '.po') if not os.path.isfile(po): po = os.path.join('po', lang + '.po') dir_ = os.path.join(self.build_dir, lang, 'LC_MESSAGES') self.mkpath(dir_) mo = os.path.join(dir_, basename) if self.force or newer(po, mo): log.info('Compile: %s -> %s' % (po, mo)) self.spawn(['msgfmt', '-o', mo, po]) bzr-2.7.0/bzrlib/bzrdir.py0000644000000000000000000027361212147364641013620 0ustar 00000000000000# Copyright (C) 2006-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """BzrDir logic. The BzrDir is the basic control directory used by bzr. At format 7 this was split out into Branch, Repository and Checkout control directories. Note: This module has a lot of ``open`` functions/methods that return references to in-memory objects. As a rule, there are no matching ``close`` methods. To free any associated resources, simply stop referencing the objects returned. """ from __future__ import absolute_import import sys from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import bzrlib from bzrlib import ( branch as _mod_branch, cleanup, errors, fetch, graph, lockable_files, lockdir, osutils, pyutils, remote, repository, revision as _mod_revision, transport as _mod_transport, ui, urlutils, vf_search, win32utils, workingtree_3, workingtree_4, ) from bzrlib.branchfmt import fullhistory as fullhistorybranch from bzrlib.repofmt import knitpack_repo from bzrlib.transport import ( do_catching_redirections, local, ) from bzrlib.i18n import gettext """) from bzrlib.trace import ( mutter, note, ) from bzrlib import ( config, controldir, registry, ) from bzrlib.symbol_versioning import ( deprecated_in, deprecated_method, ) class BzrDir(controldir.ControlDir): """A .bzr control diretory. BzrDir instances let you create or open any of the things that can be found within .bzr - checkouts, branches and repositories. :ivar transport: the transport which this bzr dir is rooted at (i.e. file:///.../.bzr/) :ivar root_transport: a transport connected to the directory this bzr was opened from (i.e. the parent directory holding the .bzr directory). Everything in the bzrdir should have the same file permissions. :cvar hooks: An instance of BzrDirHooks. """ def break_lock(self): """Invoke break_lock on the first object in the bzrdir. If there is a tree, the tree is opened and break_lock() called. Otherwise, branch is tried, and finally repository. """ # XXX: This seems more like a UI function than something that really # belongs in this class. try: thing_to_unlock = self.open_workingtree() except (errors.NotLocalUrl, errors.NoWorkingTree): try: thing_to_unlock = self.open_branch() except errors.NotBranchError: try: thing_to_unlock = self.open_repository() except errors.NoRepositoryPresent: return thing_to_unlock.break_lock() def check_conversion_target(self, target_format): """Check that a bzrdir as a whole can be converted to a new format.""" # The only current restriction is that the repository content can be # fetched compatibly with the target. target_repo_format = target_format.repository_format try: self.open_repository()._format.check_conversion_target( target_repo_format) except errors.NoRepositoryPresent: # No repo, no problem. pass def clone_on_transport(self, transport, revision_id=None, force_new_repo=False, preserve_stacking=False, stacked_on=None, create_prefix=False, use_existing_dir=True, no_tree=False): """Clone this bzrdir and its contents to transport verbatim. :param transport: The transport for the location to produce the clone at. If the target directory does not exist, it will be created. :param revision_id: The tip revision-id to use for any branch or working tree. If not None, then the clone operation may tune itself to download less data. :param force_new_repo: Do not use a shared repository for the target, even if one is available. :param preserve_stacking: When cloning a stacked branch, stack the new branch on top of the other branch's stacked-on branch. :param create_prefix: Create any missing directories leading up to to_transport. :param use_existing_dir: Use an existing directory if one exists. :param no_tree: If set to true prevents creation of a working tree. """ # Overview: put together a broad description of what we want to end up # with; then make as few api calls as possible to do it. # We may want to create a repo/branch/tree, if we do so what format # would we want for each: require_stacking = (stacked_on is not None) format = self.cloning_metadir(require_stacking) # Figure out what objects we want: try: local_repo = self.find_repository() except errors.NoRepositoryPresent: local_repo = None try: local_branch = self.open_branch() except errors.NotBranchError: local_branch = None else: # enable fallbacks when branch is not a branch reference if local_branch.repository.has_same_location(local_repo): local_repo = local_branch.repository if preserve_stacking: try: stacked_on = local_branch.get_stacked_on_url() except (errors.UnstackableBranchFormat, errors.UnstackableRepositoryFormat, errors.NotStacked): pass # Bug: We create a metadir without knowing if it can support stacking, # we should look up the policy needs first, or just use it as a hint, # or something. if local_repo: make_working_trees = local_repo.make_working_trees() and not no_tree want_shared = local_repo.is_shared() repo_format_name = format.repository_format.network_name() else: make_working_trees = False want_shared = False repo_format_name = None result_repo, result, require_stacking, repository_policy = \ format.initialize_on_transport_ex(transport, use_existing_dir=use_existing_dir, create_prefix=create_prefix, force_new_repo=force_new_repo, stacked_on=stacked_on, stack_on_pwd=self.root_transport.base, repo_format_name=repo_format_name, make_working_trees=make_working_trees, shared_repo=want_shared) if repo_format_name: try: # If the result repository is in the same place as the # resulting bzr dir, it will have no content, further if the # result is not stacked then we know all content should be # copied, and finally if we are copying up to a specific # revision_id then we can use the pending-ancestry-result which # does not require traversing all of history to describe it. if (result_repo.user_url == result.user_url and not require_stacking and revision_id is not None): fetch_spec = vf_search.PendingAncestryResult( [revision_id], local_repo) result_repo.fetch(local_repo, fetch_spec=fetch_spec) else: result_repo.fetch(local_repo, revision_id=revision_id) finally: result_repo.unlock() else: if result_repo is not None: raise AssertionError('result_repo not None(%r)' % result_repo) # 1 if there is a branch present # make sure its content is available in the target repository # clone it. if local_branch is not None: result_branch = local_branch.clone(result, revision_id=revision_id, repository_policy=repository_policy) try: # Cheaper to check if the target is not local, than to try making # the tree and fail. result.root_transport.local_abspath('.') if result_repo is None or result_repo.make_working_trees(): self.open_workingtree().clone(result, revision_id=revision_id) except (errors.NoWorkingTree, errors.NotLocalUrl): pass return result # TODO: This should be given a Transport, and should chdir up; otherwise # this will open a new connection. def _make_tail(self, url): t = _mod_transport.get_transport(url) t.ensure_base() def determine_repository_policy(self, force_new_repo=False, stack_on=None, stack_on_pwd=None, require_stacking=False): """Return an object representing a policy to use. This controls whether a new repository is created, and the format of that repository, or some existing shared repository used instead. If stack_on is supplied, will not seek a containing shared repo. :param force_new_repo: If True, require a new repository to be created. :param stack_on: If supplied, the location to stack on. If not supplied, a default_stack_on location may be used. :param stack_on_pwd: If stack_on is relative, the location it is relative to. """ def repository_policy(found_bzrdir): stack_on = None stack_on_pwd = None config = found_bzrdir.get_config() stop = False stack_on = config.get_default_stack_on() if stack_on is not None: stack_on_pwd = found_bzrdir.user_url stop = True # does it have a repository ? try: repository = found_bzrdir.open_repository() except errors.NoRepositoryPresent: repository = None else: if (found_bzrdir.user_url != self.user_url and not repository.is_shared()): # Don't look higher, can't use a higher shared repo. repository = None stop = True else: stop = True if not stop: return None, False if repository: return UseExistingRepository(repository, stack_on, stack_on_pwd, require_stacking=require_stacking), True else: return CreateRepository(self, stack_on, stack_on_pwd, require_stacking=require_stacking), True if not force_new_repo: if stack_on is None: policy = self._find_containing(repository_policy) if policy is not None: return policy else: try: return UseExistingRepository(self.open_repository(), stack_on, stack_on_pwd, require_stacking=require_stacking) except errors.NoRepositoryPresent: pass return CreateRepository(self, stack_on, stack_on_pwd, require_stacking=require_stacking) def _find_or_create_repository(self, force_new_repo): """Create a new repository if needed, returning the repository.""" policy = self.determine_repository_policy(force_new_repo) return policy.acquire_repository()[0] def _find_source_repo(self, add_cleanup, source_branch): """Find the source branch and repo for a sprout operation. This is helper intended for use by _sprout. :returns: (source_branch, source_repository). Either or both may be None. If not None, they will be read-locked (and their unlock(s) scheduled via the add_cleanup param). """ if source_branch is not None: add_cleanup(source_branch.lock_read().unlock) return source_branch, source_branch.repository try: source_branch = self.open_branch() source_repository = source_branch.repository except errors.NotBranchError: source_branch = None try: source_repository = self.open_repository() except errors.NoRepositoryPresent: source_repository = None else: add_cleanup(source_repository.lock_read().unlock) else: add_cleanup(source_branch.lock_read().unlock) return source_branch, source_repository def sprout(self, url, revision_id=None, force_new_repo=False, recurse='down', possible_transports=None, accelerator_tree=None, hardlink=False, stacked=False, source_branch=None, create_tree_if_local=True): """Create a copy of this controldir prepared for use as a new line of development. If url's last component does not exist, it will be created. Attributes related to the identity of the source branch like branch nickname will be cleaned, a working tree is created whether one existed before or not; and a local branch is always created. if revision_id is not None, then the clone operation may tune itself to download less data. :param accelerator_tree: A tree which can be used for retrieving file contents more quickly than the revision tree, i.e. a workingtree. The revision tree will be used for cases where accelerator_tree's content is different. :param hardlink: If true, hard-link files from accelerator_tree, where possible. :param stacked: If true, create a stacked branch referring to the location of this control directory. :param create_tree_if_local: If true, a working-tree will be created when working locally. :return: The created control directory """ operation = cleanup.OperationWithCleanups(self._sprout) return operation.run(url, revision_id=revision_id, force_new_repo=force_new_repo, recurse=recurse, possible_transports=possible_transports, accelerator_tree=accelerator_tree, hardlink=hardlink, stacked=stacked, source_branch=source_branch, create_tree_if_local=create_tree_if_local) def _sprout(self, op, url, revision_id=None, force_new_repo=False, recurse='down', possible_transports=None, accelerator_tree=None, hardlink=False, stacked=False, source_branch=None, create_tree_if_local=True): add_cleanup = op.add_cleanup fetch_spec_factory = fetch.FetchSpecFactory() if revision_id is not None: fetch_spec_factory.add_revision_ids([revision_id]) fetch_spec_factory.source_branch_stop_revision_id = revision_id if possible_transports is None: possible_transports = [] else: possible_transports = list(possible_transports) + [ self.root_transport] target_transport = _mod_transport.get_transport(url, possible_transports) target_transport.ensure_base() cloning_format = self.cloning_metadir(stacked) # Create/update the result branch try: result = controldir.ControlDir.open_from_transport(target_transport) except errors.NotBranchError: result = cloning_format.initialize_on_transport(target_transport) source_branch, source_repository = self._find_source_repo( add_cleanup, source_branch) fetch_spec_factory.source_branch = source_branch # if a stacked branch wasn't requested, we don't create one # even if the origin was stacked if stacked and source_branch is not None: stacked_branch_url = self.root_transport.base else: stacked_branch_url = None repository_policy = result.determine_repository_policy( force_new_repo, stacked_branch_url, require_stacking=stacked) result_repo, is_new_repo = repository_policy.acquire_repository( possible_transports=possible_transports) add_cleanup(result_repo.lock_write().unlock) fetch_spec_factory.source_repo = source_repository fetch_spec_factory.target_repo = result_repo if stacked or (len(result_repo._fallback_repositories) != 0): target_repo_kind = fetch.TargetRepoKinds.STACKED elif is_new_repo: target_repo_kind = fetch.TargetRepoKinds.EMPTY else: target_repo_kind = fetch.TargetRepoKinds.PREEXISTING fetch_spec_factory.target_repo_kind = target_repo_kind if source_repository is not None: fetch_spec = fetch_spec_factory.make_fetch_spec() result_repo.fetch(source_repository, fetch_spec=fetch_spec) if source_branch is None: # this is for sprouting a controldir without a branch; is that # actually useful? # Not especially, but it's part of the contract. result_branch = result.create_branch() else: result_branch = source_branch.sprout(result, revision_id=revision_id, repository_policy=repository_policy, repository=result_repo) mutter("created new branch %r" % (result_branch,)) # Create/update the result working tree if (create_tree_if_local and not result.has_workingtree() and isinstance(target_transport, local.LocalTransport) and (result_repo is None or result_repo.make_working_trees())): wt = result.create_workingtree(accelerator_tree=accelerator_tree, hardlink=hardlink, from_branch=result_branch) wt.lock_write() try: if wt.path2id('') is None: try: wt.set_root_id(self.open_workingtree.get_root_id()) except errors.NoWorkingTree: pass finally: wt.unlock() else: wt = None if recurse == 'down': basis = None if wt is not None: basis = wt.basis_tree() elif result_branch is not None: basis = result_branch.basis_tree() elif source_branch is not None: basis = source_branch.basis_tree() if basis is not None: add_cleanup(basis.lock_read().unlock) subtrees = basis.iter_references() else: subtrees = [] for path, file_id in subtrees: target = urlutils.join(url, urlutils.escape(path)) sublocation = source_branch.reference_parent(file_id, path) sublocation.bzrdir.sprout(target, basis.get_reference_revision(file_id, path), force_new_repo=force_new_repo, recurse=recurse, stacked=stacked) return result def _available_backup_name(self, base): """Find a non-existing backup file name based on base. See bzrlib.osutils.available_backup_name about race conditions. """ return osutils.available_backup_name(base, self.root_transport.has) def backup_bzrdir(self): """Backup this bzr control directory. :return: Tuple with old path name and new path name """ pb = ui.ui_factory.nested_progress_bar() try: old_path = self.root_transport.abspath('.bzr') backup_dir = self._available_backup_name('backup.bzr') new_path = self.root_transport.abspath(backup_dir) ui.ui_factory.note(gettext('making backup of {0}\n to {1}').format( urlutils.unescape_for_display(old_path, 'utf-8'), urlutils.unescape_for_display(new_path, 'utf-8'))) self.root_transport.copy_tree('.bzr', backup_dir) return (old_path, new_path) finally: pb.finished() def retire_bzrdir(self, limit=10000): """Permanently disable the bzrdir. This is done by renaming it to give the user some ability to recover if there was a problem. This will have horrible consequences if anyone has anything locked or in use. :param limit: number of times to retry """ i = 0 while True: try: to_path = '.bzr.retired.%d' % i self.root_transport.rename('.bzr', to_path) note(gettext("renamed {0} to {1}").format( self.root_transport.abspath('.bzr'), to_path)) return except (errors.TransportError, IOError, errors.PathError): i += 1 if i > limit: raise else: pass def _find_containing(self, evaluate): """Find something in a containing control directory. This method will scan containing control dirs, until it finds what it is looking for, decides that it will never find it, or runs out of containing control directories to check. It is used to implement find_repository and determine_repository_policy. :param evaluate: A function returning (value, stop). If stop is True, the value will be returned. """ found_bzrdir = self while True: result, stop = evaluate(found_bzrdir) if stop: return result next_transport = found_bzrdir.root_transport.clone('..') if (found_bzrdir.user_url == next_transport.base): # top of the file system return None # find the next containing bzrdir try: found_bzrdir = self.open_containing_from_transport( next_transport)[0] except errors.NotBranchError: return None def find_repository(self): """Find the repository that should be used. This does not require a branch as we use it to find the repo for new branches as well as to hook existing branches up to their repository. """ def usable_repository(found_bzrdir): # does it have a repository ? try: repository = found_bzrdir.open_repository() except errors.NoRepositoryPresent: return None, False if found_bzrdir.user_url == self.user_url: return repository, True elif repository.is_shared(): return repository, True else: return None, True found_repo = self._find_containing(usable_repository) if found_repo is None: raise errors.NoRepositoryPresent(self) return found_repo def _find_creation_modes(self): """Determine the appropriate modes for files and directories. They're always set to be consistent with the base directory, assuming that this transport allows setting modes. """ # TODO: Do we need or want an option (maybe a config setting) to turn # this off or override it for particular locations? -- mbp 20080512 if self._mode_check_done: return self._mode_check_done = True try: st = self.transport.stat('.') except errors.TransportNotPossible: self._dir_mode = None self._file_mode = None else: # Check the directory mode, but also make sure the created # directories and files are read-write for this user. This is # mostly a workaround for filesystems which lie about being able to # write to a directory (cygwin & win32) if (st.st_mode & 07777 == 00000): # FTP allows stat but does not return dir/file modes self._dir_mode = None self._file_mode = None else: self._dir_mode = (st.st_mode & 07777) | 00700 # Remove the sticky and execute bits for files self._file_mode = self._dir_mode & ~07111 def _get_file_mode(self): """Return Unix mode for newly created files, or None. """ if not self._mode_check_done: self._find_creation_modes() return self._file_mode def _get_dir_mode(self): """Return Unix mode for newly created directories, or None. """ if not self._mode_check_done: self._find_creation_modes() return self._dir_mode def get_config(self): """Get configuration for this BzrDir.""" return config.BzrDirConfig(self) def _get_config(self): """By default, no configuration is available.""" return None def __init__(self, _transport, _format): """Initialize a Bzr control dir object. Only really common logic should reside here, concrete classes should be made with varying behaviours. :param _format: the format that is creating this BzrDir instance. :param _transport: the transport this dir is based at. """ self._format = _format # these are also under the more standard names of # control_transport and user_transport self.transport = _transport.clone('.bzr') self.root_transport = _transport self._mode_check_done = False @property def user_transport(self): return self.root_transport @property def control_transport(self): return self.transport def is_control_filename(self, filename): """True if filename is the name of a path which is reserved for bzrdir's. :param filename: A filename within the root transport of this bzrdir. This is true IF and ONLY IF the filename is part of the namespace reserved for bzr control dirs. Currently this is the '.bzr' directory in the root of the root_transport. """ # this might be better on the BzrDirFormat class because it refers to # all the possible bzrdir disk formats. # This method is tested via the workingtree is_control_filename tests- # it was extracted from WorkingTree.is_control_filename. If the method's # contract is extended beyond the current trivial implementation, please # add new tests for it to the appropriate place. return filename == '.bzr' or filename.startswith('.bzr/') def _cloning_metadir(self): """Produce a metadir suitable for cloning with. :returns: (destination_bzrdir_format, source_repository) """ result_format = self._format.__class__() try: try: branch = self.open_branch(ignore_fallbacks=True) source_repository = branch.repository result_format._branch_format = branch._format except errors.NotBranchError: source_branch = None source_repository = self.open_repository() except errors.NoRepositoryPresent: source_repository = None else: # XXX TODO: This isinstance is here because we have not implemented # the fix recommended in bug # 103195 - to delegate this choice the # repository itself. repo_format = source_repository._format if isinstance(repo_format, remote.RemoteRepositoryFormat): source_repository._ensure_real() repo_format = source_repository._real_repository._format result_format.repository_format = repo_format try: # TODO: Couldn't we just probe for the format in these cases, # rather than opening the whole tree? It would be a little # faster. mbp 20070401 tree = self.open_workingtree(recommend_upgrade=False) except (errors.NoWorkingTree, errors.NotLocalUrl): result_format.workingtree_format = None else: result_format.workingtree_format = tree._format.__class__() return result_format, source_repository def cloning_metadir(self, require_stacking=False): """Produce a metadir suitable for cloning or sprouting with. These operations may produce workingtrees (yes, even though they're "cloning" something that doesn't have a tree), so a viable workingtree format must be selected. :require_stacking: If True, non-stackable formats will be upgraded to similar stackable formats. :returns: a ControlDirFormat with all component formats either set appropriately or set to None if that component should not be created. """ format, repository = self._cloning_metadir() if format._workingtree_format is None: # No tree in self. if repository is None: # No repository either return format # We have a repository, so set a working tree? (Why? This seems to # contradict the stated return value in the docstring). tree_format = repository._format._matchingbzrdir.workingtree_format format.workingtree_format = tree_format.__class__() if require_stacking: format.require_stacking() return format def get_branch_transport(self, branch_format, name=None): """Get the transport for use by branch format in this BzrDir. Note that bzr dirs that do not support format strings will raise IncompatibleFormat if the branch format they are given has a format string, and vice versa. If branch_format is None, the transport is returned with no checking. If it is not None, then the returned transport is guaranteed to point to an existing directory ready for use. """ raise NotImplementedError(self.get_branch_transport) def get_repository_transport(self, repository_format): """Get the transport for use by repository format in this BzrDir. Note that bzr dirs that do not support format strings will raise IncompatibleFormat if the repository format they are given has a format string, and vice versa. If repository_format is None, the transport is returned with no checking. If it is not None, then the returned transport is guaranteed to point to an existing directory ready for use. """ raise NotImplementedError(self.get_repository_transport) def get_workingtree_transport(self, tree_format): """Get the transport for use by workingtree format in this BzrDir. Note that bzr dirs that do not support format strings will raise IncompatibleFormat if the workingtree format they are given has a format string, and vice versa. If workingtree_format is None, the transport is returned with no checking. If it is not None, then the returned transport is guaranteed to point to an existing directory ready for use. """ raise NotImplementedError(self.get_workingtree_transport) @classmethod def create(cls, base, format=None, possible_transports=None): """Create a new BzrDir at the url 'base'. :param format: If supplied, the format of branch to create. If not supplied, the default is used. :param possible_transports: If supplied, a list of transports that can be reused to share a remote connection. """ if cls is not BzrDir: raise AssertionError("BzrDir.create always creates the " "default format, not one of %r" % cls) return controldir.ControlDir.create(base, format=format, possible_transports=possible_transports) def __repr__(self): return "<%s at %r>" % (self.__class__.__name__, self.user_url) def update_feature_flags(self, updated_flags): """Update the features required by this bzrdir. :param updated_flags: Dictionary mapping feature names to necessities A necessity can be None to indicate the feature should be removed """ self.control_files.lock_write() try: self._format._update_feature_flags(updated_flags) self.transport.put_bytes('branch-format', self._format.as_string()) finally: self.control_files.unlock() class BzrDirMeta1(BzrDir): """A .bzr meta version 1 control object. This is the first control object where the individual aspects are really split out: there are separate repository, workingtree and branch subdirectories and any subset of the three can be present within a BzrDir. """ def _get_branch_path(self, name): """Obtain the branch path to use. This uses the API specified branch name first, and then falls back to the branch name specified in the URL. If neither of those is specified, it uses the default branch. :param name: Optional branch name to use :return: Relative path to branch """ if name == "": return 'branch' return urlutils.join('branches', name.encode("utf-8")) def _read_branch_list(self): """Read the branch list. :return: List of utf-8 encoded branch names. """ try: f = self.control_transport.get('branch-list') except errors.NoSuchFile: return [] ret = [] try: for name in f: ret.append(name.rstrip("\n")) finally: f.close() return ret def _write_branch_list(self, branches): """Write out the branch list. :param branches: List of utf-8 branch names to write """ self.transport.put_bytes('branch-list', "".join([name+"\n" for name in branches])) def __init__(self, _transport, _format): super(BzrDirMeta1, self).__init__(_transport, _format) self.control_files = lockable_files.LockableFiles( self.control_transport, self._format._lock_file_name, self._format._lock_class) def can_convert_format(self): """See BzrDir.can_convert_format().""" return True def create_branch(self, name=None, repository=None, append_revisions_only=None): """See ControlDir.create_branch.""" if name is None: name = self._get_selected_branch() return self._format.get_branch_format().initialize(self, name=name, repository=repository, append_revisions_only=append_revisions_only) def destroy_branch(self, name=None): """See ControlDir.destroy_branch.""" if name is None: name = self._get_selected_branch() path = self._get_branch_path(name) if name != "": self.control_files.lock_write() try: branches = self._read_branch_list() try: branches.remove(name.encode("utf-8")) except ValueError: raise errors.NotBranchError(name) self._write_branch_list(branches) finally: self.control_files.unlock() try: self.transport.delete_tree(path) except errors.NoSuchFile: raise errors.NotBranchError(path=urlutils.join(self.transport.base, path), bzrdir=self) def create_repository(self, shared=False): """See BzrDir.create_repository.""" return self._format.repository_format.initialize(self, shared) def destroy_repository(self): """See BzrDir.destroy_repository.""" try: self.transport.delete_tree('repository') except errors.NoSuchFile: raise errors.NoRepositoryPresent(self) def create_workingtree(self, revision_id=None, from_branch=None, accelerator_tree=None, hardlink=False): """See BzrDir.create_workingtree.""" return self._format.workingtree_format.initialize( self, revision_id, from_branch=from_branch, accelerator_tree=accelerator_tree, hardlink=hardlink) def destroy_workingtree(self): """See BzrDir.destroy_workingtree.""" wt = self.open_workingtree(recommend_upgrade=False) repository = wt.branch.repository empty = repository.revision_tree(_mod_revision.NULL_REVISION) # We ignore the conflicts returned by wt.revert since we're about to # delete the wt metadata anyway, all that should be left here are # detritus. But see bug #634470 about subtree .bzr dirs. conflicts = wt.revert(old_tree=empty) self.destroy_workingtree_metadata() def destroy_workingtree_metadata(self): self.transport.delete_tree('checkout') def find_branch_format(self, name=None): """Find the branch 'format' for this bzrdir. This might be a synthetic object for e.g. RemoteBranch and SVN. """ from bzrlib.branch import BranchFormatMetadir return BranchFormatMetadir.find_format(self, name=name) def _get_mkdir_mode(self): """Figure out the mode to use when creating a bzrdir subdir.""" temp_control = lockable_files.LockableFiles(self.transport, '', lockable_files.TransportLock) return temp_control._dir_mode def get_branch_reference(self, name=None): """See BzrDir.get_branch_reference().""" from bzrlib.branch import BranchFormatMetadir format = BranchFormatMetadir.find_format(self, name=name) return format.get_reference(self, name=name) def set_branch_reference(self, target_branch, name=None): format = _mod_branch.BranchReferenceFormat() return format.initialize(self, target_branch=target_branch, name=name) def get_branch_transport(self, branch_format, name=None): """See BzrDir.get_branch_transport().""" if name is None: name = self._get_selected_branch() path = self._get_branch_path(name) # XXX: this shouldn't implicitly create the directory if it's just # promising to get a transport -- mbp 20090727 if branch_format is None: return self.transport.clone(path) try: branch_format.get_format_string() except NotImplementedError: raise errors.IncompatibleFormat(branch_format, self._format) if name != "": branches = self._read_branch_list() utf8_name = name.encode("utf-8") if not utf8_name in branches: self.control_files.lock_write() try: branches = self._read_branch_list() dirname = urlutils.dirname(utf8_name) if dirname != "" and dirname in branches: raise errors.ParentBranchExists(name) child_branches = [ b.startswith(utf8_name+"/") for b in branches] if any(child_branches): raise errors.AlreadyBranchError(name) branches.append(utf8_name) self._write_branch_list(branches) finally: self.control_files.unlock() branch_transport = self.transport.clone(path) mode = self._get_mkdir_mode() branch_transport.create_prefix(mode=mode) try: self.transport.mkdir(path, mode=mode) except errors.FileExists: pass return self.transport.clone(path) def get_repository_transport(self, repository_format): """See BzrDir.get_repository_transport().""" if repository_format is None: return self.transport.clone('repository') try: repository_format.get_format_string() except NotImplementedError: raise errors.IncompatibleFormat(repository_format, self._format) try: self.transport.mkdir('repository', mode=self._get_mkdir_mode()) except errors.FileExists: pass return self.transport.clone('repository') def get_workingtree_transport(self, workingtree_format): """See BzrDir.get_workingtree_transport().""" if workingtree_format is None: return self.transport.clone('checkout') try: workingtree_format.get_format_string() except NotImplementedError: raise errors.IncompatibleFormat(workingtree_format, self._format) try: self.transport.mkdir('checkout', mode=self._get_mkdir_mode()) except errors.FileExists: pass return self.transport.clone('checkout') def get_branches(self): """See ControlDir.get_branches.""" ret = {} try: ret[""] = self.open_branch(name="") except (errors.NotBranchError, errors.NoRepositoryPresent): pass for name in self._read_branch_list(): ret[name] = self.open_branch(name=name.decode('utf-8')) return ret def has_workingtree(self): """Tell if this bzrdir contains a working tree. Note: if you're going to open the working tree, you should just go ahead and try, and not ask permission first. """ from bzrlib.workingtree import WorkingTreeFormatMetaDir try: WorkingTreeFormatMetaDir.find_format_string(self) except errors.NoWorkingTree: return False return True def needs_format_conversion(self, format): """See BzrDir.needs_format_conversion().""" if (not isinstance(self._format, format.__class__) or self._format.get_format_string() != format.get_format_string()): # it is not a meta dir format, conversion is needed. return True # we might want to push this down to the repository? try: if not isinstance(self.open_repository()._format, format.repository_format.__class__): # the repository needs an upgrade. return True except errors.NoRepositoryPresent: pass for branch in self.list_branches(): if not isinstance(branch._format, format.get_branch_format().__class__): # the branch needs an upgrade. return True try: my_wt = self.open_workingtree(recommend_upgrade=False) if not isinstance(my_wt._format, format.workingtree_format.__class__): # the workingtree needs an upgrade. return True except (errors.NoWorkingTree, errors.NotLocalUrl): pass return False def open_branch(self, name=None, unsupported=False, ignore_fallbacks=False, possible_transports=None): """See ControlDir.open_branch.""" if name is None: name = self._get_selected_branch() format = self.find_branch_format(name=name) format.check_support_status(unsupported) return format.open(self, name=name, _found=True, ignore_fallbacks=ignore_fallbacks, possible_transports=possible_transports) def open_repository(self, unsupported=False): """See BzrDir.open_repository.""" from bzrlib.repository import RepositoryFormatMetaDir format = RepositoryFormatMetaDir.find_format(self) format.check_support_status(unsupported) return format.open(self, _found=True) def open_workingtree(self, unsupported=False, recommend_upgrade=True): """See BzrDir.open_workingtree.""" from bzrlib.workingtree import WorkingTreeFormatMetaDir format = WorkingTreeFormatMetaDir.find_format(self) format.check_support_status(unsupported, recommend_upgrade, basedir=self.root_transport.base) return format.open(self, _found=True) def _get_config(self): return config.TransportConfig(self.transport, 'control.conf') class BzrFormat(object): """Base class for all formats of things living in metadirs. This class manages the format string that is stored in the 'format' or 'branch-format' file. All classes for (branch-, repository-, workingtree-) formats that live in meta directories and have their own 'format' file (i.e. different from .bzr/branch-format) derive from this class, as well as the relevant base class for their kind (BranchFormat, WorkingTreeFormat, RepositoryFormat). Each format is identified by a "format" or "branch-format" file with a single line containing the base format name and then an optional list of feature flags. Feature flags are supported as of bzr 2.5. Setting feature flags on formats will render them inaccessible to older versions of bzr. :ivar features: Dictionary mapping feature names to their necessity """ _present_features = set() def __init__(self): self.features = {} @classmethod def register_feature(cls, name): """Register a feature as being present. :param name: Name of the feature """ if " " in name: raise ValueError("spaces are not allowed in feature names") if name in cls._present_features: raise errors.FeatureAlreadyRegistered(name) cls._present_features.add(name) @classmethod def unregister_feature(cls, name): """Unregister a feature.""" cls._present_features.remove(name) def check_support_status(self, allow_unsupported, recommend_upgrade=True, basedir=None): for name, necessity in self.features.iteritems(): if name in self._present_features: continue if necessity == "optional": mutter("ignoring optional missing feature %s", name) continue elif necessity == "required": raise errors.MissingFeature(name) else: mutter("treating unknown necessity as require for %s", name) raise errors.MissingFeature(name) @classmethod def get_format_string(cls): """Return the ASCII format string that identifies this format.""" raise NotImplementedError(cls.get_format_string) @classmethod def from_string(cls, text): format_string = cls.get_format_string() if not text.startswith(format_string): raise AssertionError("Invalid format header %r for %r" % (text, cls)) lines = text[len(format_string):].splitlines() ret = cls() for lineno, line in enumerate(lines): try: (necessity, feature) = line.split(" ", 1) except ValueError: raise errors.ParseFormatError(format=cls, lineno=lineno+2, line=line, text=text) ret.features[feature] = necessity return ret def as_string(self): """Return the string representation of this format. """ lines = [self.get_format_string()] lines.extend([("%s %s\n" % (item[1], item[0])) for item in self.features.iteritems()]) return "".join(lines) @classmethod def _find_format(klass, registry, kind, format_string): try: first_line = format_string[:format_string.index("\n")+1] except ValueError: first_line = format_string try: cls = registry.get(first_line) except KeyError: raise errors.UnknownFormatError(format=first_line, kind=kind) return cls.from_string(format_string) def network_name(self): """A simple byte string uniquely identifying this format for RPC calls. Metadir branch formats use their format string. """ return self.as_string() def __eq__(self, other): return (self.__class__ is other.__class__ and self.features == other.features) def _update_feature_flags(self, updated_flags): """Update the feature flags in this format. :param updated_flags: Updated feature flags """ for name, necessity in updated_flags.iteritems(): if necessity is None: try: del self.features[name] except KeyError: pass else: self.features[name] = necessity class BzrProber(controldir.Prober): """Prober for formats that use a .bzr/ control directory.""" formats = registry.FormatRegistry(controldir.network_format_registry) """The known .bzr formats.""" @classmethod def probe_transport(klass, transport): """Return the .bzrdir style format present in a directory.""" try: format_string = transport.get_bytes(".bzr/branch-format") except errors.NoSuchFile: raise errors.NotBranchError(path=transport.base) try: first_line = format_string[:format_string.index("\n")+1] except ValueError: first_line = format_string try: cls = klass.formats.get(first_line) except KeyError: raise errors.UnknownFormatError(format=first_line, kind='bzrdir') return cls.from_string(format_string) @classmethod def known_formats(cls): result = set() for name, format in cls.formats.iteritems(): if callable(format): format = format() result.add(format) return result controldir.ControlDirFormat.register_prober(BzrProber) class RemoteBzrProber(controldir.Prober): """Prober for remote servers that provide a Bazaar smart server.""" @classmethod def probe_transport(klass, transport): """Return a RemoteBzrDirFormat object if it looks possible.""" try: medium = transport.get_smart_medium() except (NotImplementedError, AttributeError, errors.TransportNotPossible, errors.NoSmartMedium, errors.SmartProtocolError): # no smart server, so not a branch for this format type. raise errors.NotBranchError(path=transport.base) else: # Decline to open it if the server doesn't support our required # version (3) so that the VFS-based transport will do it. if medium.should_probe(): try: server_version = medium.protocol_version() except errors.SmartProtocolError: # Apparently there's no usable smart server there, even though # the medium supports the smart protocol. raise errors.NotBranchError(path=transport.base) if server_version != '2': raise errors.NotBranchError(path=transport.base) from bzrlib.remote import RemoteBzrDirFormat return RemoteBzrDirFormat() @classmethod def known_formats(cls): from bzrlib.remote import RemoteBzrDirFormat return set([RemoteBzrDirFormat()]) class BzrDirFormat(BzrFormat, controldir.ControlDirFormat): """ControlDirFormat base class for .bzr/ directories. Formats are placed in a dict by their format string for reference during bzrdir opening. These should be subclasses of BzrDirFormat for consistency. Once a format is deprecated, just deprecate the initialize and open methods on the format class. Do not deprecate the object, as the object will be created every system load. """ _lock_file_name = 'branch-lock' # _lock_class must be set in subclasses to the lock type, typ. # TransportLock or LockDir def initialize_on_transport(self, transport): """Initialize a new bzrdir in the base directory of a Transport.""" try: # can we hand off the request to the smart server rather than using # vfs calls? client_medium = transport.get_smart_medium() except errors.NoSmartMedium: return self._initialize_on_transport_vfs(transport) else: # Current RPC's only know how to create bzr metadir1 instances, so # we still delegate to vfs methods if the requested format is not a # metadir1 if type(self) != BzrDirMetaFormat1: return self._initialize_on_transport_vfs(transport) from bzrlib.remote import RemoteBzrDirFormat remote_format = RemoteBzrDirFormat() self._supply_sub_formats_to(remote_format) return remote_format.initialize_on_transport(transport) def initialize_on_transport_ex(self, transport, use_existing_dir=False, create_prefix=False, force_new_repo=False, stacked_on=None, stack_on_pwd=None, repo_format_name=None, make_working_trees=None, shared_repo=False, vfs_only=False): """Create this format on transport. The directory to initialize will be created. :param force_new_repo: Do not use a shared repository for the target, even if one is available. :param create_prefix: Create any missing directories leading up to to_transport. :param use_existing_dir: Use an existing directory if one exists. :param stacked_on: A url to stack any created branch on, None to follow any target stacking policy. :param stack_on_pwd: If stack_on is relative, the location it is relative to. :param repo_format_name: If non-None, a repository will be made-or-found. Should none be found, or if force_new_repo is True the repo_format_name is used to select the format of repository to create. :param make_working_trees: Control the setting of make_working_trees for a new shared repository when one is made. None to use whatever default the format has. :param shared_repo: Control whether made repositories are shared or not. :param vfs_only: If True do not attempt to use a smart server :return: repo, controldir, require_stacking, repository_policy. repo is None if none was created or found, bzrdir is always valid. require_stacking is the result of examining the stacked_on parameter and any stacking policy found for the target. """ if not vfs_only: # Try to hand off to a smart server try: client_medium = transport.get_smart_medium() except errors.NoSmartMedium: pass else: from bzrlib.remote import RemoteBzrDirFormat # TODO: lookup the local format from a server hint. remote_dir_format = RemoteBzrDirFormat() remote_dir_format._network_name = self.network_name() self._supply_sub_formats_to(remote_dir_format) return remote_dir_format.initialize_on_transport_ex(transport, use_existing_dir=use_existing_dir, create_prefix=create_prefix, force_new_repo=force_new_repo, stacked_on=stacked_on, stack_on_pwd=stack_on_pwd, repo_format_name=repo_format_name, make_working_trees=make_working_trees, shared_repo=shared_repo) # XXX: Refactor the create_prefix/no_create_prefix code into a # common helper function # The destination may not exist - if so make it according to policy. def make_directory(transport): transport.mkdir('.') return transport def redirected(transport, e, redirection_notice): note(redirection_notice) return transport._redirected_to(e.source, e.target) try: transport = do_catching_redirections(make_directory, transport, redirected) except errors.FileExists: if not use_existing_dir: raise except errors.NoSuchFile: if not create_prefix: raise transport.create_prefix() require_stacking = (stacked_on is not None) # Now the target directory exists, but doesn't have a .bzr # directory. So we need to create it, along with any work to create # all of the dependent branches, etc. result = self.initialize_on_transport(transport) if repo_format_name: try: # use a custom format result._format.repository_format = \ repository.network_format_registry.get(repo_format_name) except AttributeError: # The format didn't permit it to be set. pass # A repository is desired, either in-place or shared. repository_policy = result.determine_repository_policy( force_new_repo, stacked_on, stack_on_pwd, require_stacking=require_stacking) result_repo, is_new_repo = repository_policy.acquire_repository( make_working_trees, shared_repo) if not require_stacking and repository_policy._require_stacking: require_stacking = True result._format.require_stacking() result_repo.lock_write() else: result_repo = None repository_policy = None return result_repo, result, require_stacking, repository_policy def _initialize_on_transport_vfs(self, transport): """Initialize a new bzrdir using VFS calls. :param transport: The transport to create the .bzr directory in. :return: A """ # Since we are creating a .bzr directory, inherit the # mode from the root directory temp_control = lockable_files.LockableFiles(transport, '', lockable_files.TransportLock) try: temp_control._transport.mkdir('.bzr', # FIXME: RBC 20060121 don't peek under # the covers mode=temp_control._dir_mode) except errors.FileExists: raise errors.AlreadyControlDirError(transport.base) if sys.platform == 'win32' and isinstance(transport, local.LocalTransport): win32utils.set_file_attr_hidden(transport._abspath('.bzr')) file_mode = temp_control._file_mode del temp_control bzrdir_transport = transport.clone('.bzr') utf8_files = [('README', "This is a Bazaar control directory.\n" "Do not change any files in this directory.\n" "See http://bazaar.canonical.com/ for more information about Bazaar.\n"), ('branch-format', self.as_string()), ] # NB: no need to escape relative paths that are url safe. control_files = lockable_files.LockableFiles(bzrdir_transport, self._lock_file_name, self._lock_class) control_files.create_lock() control_files.lock_write() try: for (filename, content) in utf8_files: bzrdir_transport.put_bytes(filename, content, mode=file_mode) finally: control_files.unlock() return self.open(transport, _found=True) def open(self, transport, _found=False): """Return an instance of this format for the dir transport points at. _found is a private parameter, do not use it. """ if not _found: found_format = controldir.ControlDirFormat.find_format(transport) if not isinstance(found_format, self.__class__): raise AssertionError("%s was asked to open %s, but it seems to need " "format %s" % (self, transport, found_format)) # Allow subclasses - use the found format. self._supply_sub_formats_to(found_format) return found_format._open(transport) return self._open(transport) def _open(self, transport): """Template method helper for opening BzrDirectories. This performs the actual open and any additional logic or parameter passing. """ raise NotImplementedError(self._open) def _supply_sub_formats_to(self, other_format): """Give other_format the same values for sub formats as this has. This method is expected to be used when parameterising a RemoteBzrDirFormat instance with the parameters from a BzrDirMetaFormat1 instance. :param other_format: other_format is a format which should be compatible with whatever sub formats are supported by self. :return: None. """ other_format.features = dict(self.features) def supports_transport(self, transport): # bzr formats can be opened over all known transports return True def check_support_status(self, allow_unsupported, recommend_upgrade=True, basedir=None): controldir.ControlDirFormat.check_support_status(self, allow_unsupported=allow_unsupported, recommend_upgrade=recommend_upgrade, basedir=basedir) BzrFormat.check_support_status(self, allow_unsupported=allow_unsupported, recommend_upgrade=recommend_upgrade, basedir=basedir) class BzrDirMetaFormat1(BzrDirFormat): """Bzr meta control format 1 This is the first format with split out working tree, branch and repository disk storage. It has: - Format 3 working trees [optional] - Format 5 branches [optional] - Format 7 repositories [optional] """ _lock_class = lockdir.LockDir fixed_components = False colocated_branches = True def __init__(self): BzrDirFormat.__init__(self) self._workingtree_format = None self._branch_format = None self._repository_format = None def __eq__(self, other): if other.__class__ is not self.__class__: return False if other.repository_format != self.repository_format: return False if other.workingtree_format != self.workingtree_format: return False if other.features != self.features: return False return True def __ne__(self, other): return not self == other def get_branch_format(self): if self._branch_format is None: from bzrlib.branch import format_registry as branch_format_registry self._branch_format = branch_format_registry.get_default() return self._branch_format def set_branch_format(self, format): self._branch_format = format def require_stacking(self, stack_on=None, possible_transports=None, _skip_repo=False): """We have a request to stack, try to ensure the formats support it. :param stack_on: If supplied, it is the URL to a branch that we want to stack on. Check to see if that format supports stacking before forcing an upgrade. """ # Stacking is desired. requested by the target, but does the place it # points at support stacking? If it doesn't then we should # not implicitly upgrade. We check this here. new_repo_format = None new_branch_format = None # a bit of state for get_target_branch so that we don't try to open it # 2 times, for both repo *and* branch target = [None, False, None] # target_branch, checked, upgrade anyway def get_target_branch(): if target[1]: # We've checked, don't check again return target if stack_on is None: # No target format, that means we want to force upgrading target[:] = [None, True, True] return target try: target_dir = BzrDir.open(stack_on, possible_transports=possible_transports) except errors.NotBranchError: # Nothing there, don't change formats target[:] = [None, True, False] return target except errors.JailBreak: # JailBreak, JFDI and upgrade anyway target[:] = [None, True, True] return target try: target_branch = target_dir.open_branch() except errors.NotBranchError: # No branch, don't upgrade formats target[:] = [None, True, False] return target target[:] = [target_branch, True, False] return target if (not _skip_repo and not self.repository_format.supports_external_lookups): # We need to upgrade the Repository. target_branch, _, do_upgrade = get_target_branch() if target_branch is None: # We don't have a target branch, should we upgrade anyway? if do_upgrade: # stack_on is inaccessible, JFDI. # TODO: bad monkey, hard-coded formats... if self.repository_format.rich_root_data: new_repo_format = knitpack_repo.RepositoryFormatKnitPack5RichRoot() else: new_repo_format = knitpack_repo.RepositoryFormatKnitPack5() else: # If the target already supports stacking, then we know the # project is already able to use stacking, so auto-upgrade # for them new_repo_format = target_branch.repository._format if not new_repo_format.supports_external_lookups: # target doesn't, source doesn't, so don't auto upgrade # repo new_repo_format = None if new_repo_format is not None: self.repository_format = new_repo_format note(gettext('Source repository format does not support stacking,' ' using format:\n %s'), new_repo_format.get_format_description()) if not self.get_branch_format().supports_stacking(): # We just checked the repo, now lets check if we need to # upgrade the branch format target_branch, _, do_upgrade = get_target_branch() if target_branch is None: if do_upgrade: # TODO: bad monkey, hard-coded formats... from bzrlib.branch import BzrBranchFormat7 new_branch_format = BzrBranchFormat7() else: new_branch_format = target_branch._format if not new_branch_format.supports_stacking(): new_branch_format = None if new_branch_format is not None: # Does support stacking, use its format. self.set_branch_format(new_branch_format) note(gettext('Source branch format does not support stacking,' ' using format:\n %s'), new_branch_format.get_format_description()) def get_converter(self, format=None): """See BzrDirFormat.get_converter().""" if format is None: format = BzrDirFormat.get_default_format() if (type(self) is BzrDirMetaFormat1 and type(format) is BzrDirMetaFormat1Colo): return ConvertMetaToColo(format) if (type(self) is BzrDirMetaFormat1Colo and type(format) is BzrDirMetaFormat1): return ConvertMetaToColo(format) if not isinstance(self, format.__class__): # converting away from metadir is not implemented raise NotImplementedError(self.get_converter) return ConvertMetaToMeta(format) @classmethod def get_format_string(cls): """See BzrDirFormat.get_format_string().""" return "Bazaar-NG meta directory, format 1\n" def get_format_description(self): """See BzrDirFormat.get_format_description().""" return "Meta directory format 1" def _open(self, transport): """See BzrDirFormat._open.""" # Create a new format instance because otherwise initialisation of new # metadirs share the global default format object leading to alias # problems. format = BzrDirMetaFormat1() self._supply_sub_formats_to(format) return BzrDirMeta1(transport, format) def __return_repository_format(self): """Circular import protection.""" if self._repository_format: return self._repository_format from bzrlib.repository import format_registry return format_registry.get_default() def _set_repository_format(self, value): """Allow changing the repository format for metadir formats.""" self._repository_format = value repository_format = property(__return_repository_format, _set_repository_format) def _supply_sub_formats_to(self, other_format): """Give other_format the same values for sub formats as this has. This method is expected to be used when parameterising a RemoteBzrDirFormat instance with the parameters from a BzrDirMetaFormat1 instance. :param other_format: other_format is a format which should be compatible with whatever sub formats are supported by self. :return: None. """ super(BzrDirMetaFormat1, self)._supply_sub_formats_to(other_format) if getattr(self, '_repository_format', None) is not None: other_format.repository_format = self.repository_format if self._branch_format is not None: other_format._branch_format = self._branch_format if self._workingtree_format is not None: other_format.workingtree_format = self.workingtree_format def __get_workingtree_format(self): if self._workingtree_format is None: from bzrlib.workingtree import ( format_registry as wt_format_registry, ) self._workingtree_format = wt_format_registry.get_default() return self._workingtree_format def __set_workingtree_format(self, wt_format): self._workingtree_format = wt_format def __repr__(self): return "<%r>" % (self.__class__.__name__,) workingtree_format = property(__get_workingtree_format, __set_workingtree_format) # Register bzr formats BzrProber.formats.register(BzrDirMetaFormat1.get_format_string(), BzrDirMetaFormat1) controldir.ControlDirFormat._default_format = BzrDirMetaFormat1() class BzrDirMetaFormat1Colo(BzrDirMetaFormat1): """BzrDirMeta1 format with support for colocated branches.""" colocated_branches = True @classmethod def get_format_string(cls): """See BzrDirFormat.get_format_string().""" return "Bazaar meta directory, format 1 (with colocated branches)\n" def get_format_description(self): """See BzrDirFormat.get_format_description().""" return "Meta directory format 1 with support for colocated branches" def _open(self, transport): """See BzrDirFormat._open.""" # Create a new format instance because otherwise initialisation of new # metadirs share the global default format object leading to alias # problems. format = BzrDirMetaFormat1Colo() self._supply_sub_formats_to(format) return BzrDirMeta1(transport, format) BzrProber.formats.register(BzrDirMetaFormat1Colo.get_format_string(), BzrDirMetaFormat1Colo) class ConvertMetaToMeta(controldir.Converter): """Converts the components of metadirs.""" def __init__(self, target_format): """Create a metadir to metadir converter. :param target_format: The final metadir format that is desired. """ self.target_format = target_format def convert(self, to_convert, pb): """See Converter.convert().""" self.bzrdir = to_convert self.pb = ui.ui_factory.nested_progress_bar() self.count = 0 self.total = 1 self.step('checking repository format') try: repo = self.bzrdir.open_repository() except errors.NoRepositoryPresent: pass else: if not isinstance(repo._format, self.target_format.repository_format.__class__): from bzrlib.repository import CopyConverter ui.ui_factory.note(gettext('starting repository conversion')) converter = CopyConverter(self.target_format.repository_format) converter.convert(repo, pb) for branch in self.bzrdir.list_branches(): # TODO: conversions of Branch and Tree should be done by # InterXFormat lookups/some sort of registry. # Avoid circular imports old = branch._format.__class__ new = self.target_format.get_branch_format().__class__ while old != new: if (old == fullhistorybranch.BzrBranchFormat5 and new in (_mod_branch.BzrBranchFormat6, _mod_branch.BzrBranchFormat7, _mod_branch.BzrBranchFormat8)): branch_converter = _mod_branch.Converter5to6() elif (old == _mod_branch.BzrBranchFormat6 and new in (_mod_branch.BzrBranchFormat7, _mod_branch.BzrBranchFormat8)): branch_converter = _mod_branch.Converter6to7() elif (old == _mod_branch.BzrBranchFormat7 and new is _mod_branch.BzrBranchFormat8): branch_converter = _mod_branch.Converter7to8() else: raise errors.BadConversionTarget("No converter", new, branch._format) branch_converter.convert(branch) branch = self.bzrdir.open_branch() old = branch._format.__class__ try: tree = self.bzrdir.open_workingtree(recommend_upgrade=False) except (errors.NoWorkingTree, errors.NotLocalUrl): pass else: # TODO: conversions of Branch and Tree should be done by # InterXFormat lookups if (isinstance(tree, workingtree_3.WorkingTree3) and not isinstance(tree, workingtree_4.DirStateWorkingTree) and isinstance(self.target_format.workingtree_format, workingtree_4.DirStateWorkingTreeFormat)): workingtree_4.Converter3to4().convert(tree) if (isinstance(tree, workingtree_4.DirStateWorkingTree) and not isinstance(tree, workingtree_4.WorkingTree5) and isinstance(self.target_format.workingtree_format, workingtree_4.WorkingTreeFormat5)): workingtree_4.Converter4to5().convert(tree) if (isinstance(tree, workingtree_4.DirStateWorkingTree) and not isinstance(tree, workingtree_4.WorkingTree6) and isinstance(self.target_format.workingtree_format, workingtree_4.WorkingTreeFormat6)): workingtree_4.Converter4or5to6().convert(tree) self.pb.finished() return to_convert class ConvertMetaToColo(controldir.Converter): """Add colocated branch support.""" def __init__(self, target_format): """Create a converter.that upgrades a metadir to the colo format. :param target_format: The final metadir format that is desired. """ self.target_format = target_format def convert(self, to_convert, pb): """See Converter.convert().""" to_convert.transport.put_bytes('branch-format', self.target_format.as_string()) return BzrDir.open_from_transport(to_convert.root_transport) class ConvertMetaToColo(controldir.Converter): """Convert a 'development-colo' bzrdir to a '2a' bzrdir.""" def __init__(self, target_format): """Create a converter that converts a 'development-colo' metadir to a '2a' metadir. :param target_format: The final metadir format that is desired. """ self.target_format = target_format def convert(self, to_convert, pb): """See Converter.convert().""" to_convert.transport.put_bytes('branch-format', self.target_format.as_string()) return BzrDir.open_from_transport(to_convert.root_transport) controldir.ControlDirFormat.register_server_prober(RemoteBzrProber) class RepositoryAcquisitionPolicy(object): """Abstract base class for repository acquisition policies. A repository acquisition policy decides how a BzrDir acquires a repository for a branch that is being created. The most basic policy decision is whether to create a new repository or use an existing one. """ def __init__(self, stack_on, stack_on_pwd, require_stacking): """Constructor. :param stack_on: A location to stack on :param stack_on_pwd: If stack_on is relative, the location it is relative to. :param require_stacking: If True, it is a failure to not stack. """ self._stack_on = stack_on self._stack_on_pwd = stack_on_pwd self._require_stacking = require_stacking def configure_branch(self, branch): """Apply any configuration data from this policy to the branch. Default implementation sets repository stacking. """ if self._stack_on is None: return if self._stack_on_pwd is None: stack_on = self._stack_on else: try: stack_on = urlutils.rebase_url(self._stack_on, self._stack_on_pwd, branch.user_url) except errors.InvalidRebaseURLs: stack_on = self._get_full_stack_on() try: branch.set_stacked_on_url(stack_on) except (errors.UnstackableBranchFormat, errors.UnstackableRepositoryFormat): if self._require_stacking: raise def requires_stacking(self): """Return True if this policy requires stacking.""" return self._stack_on is not None and self._require_stacking def _get_full_stack_on(self): """Get a fully-qualified URL for the stack_on location.""" if self._stack_on is None: return None if self._stack_on_pwd is None: return self._stack_on else: return urlutils.join(self._stack_on_pwd, self._stack_on) def _add_fallback(self, repository, possible_transports=None): """Add a fallback to the supplied repository, if stacking is set.""" stack_on = self._get_full_stack_on() if stack_on is None: return try: stacked_dir = BzrDir.open(stack_on, possible_transports=possible_transports) except errors.JailBreak: # We keep the stacking details, but we are in the server code so # actually stacking is not needed. return try: stacked_repo = stacked_dir.open_branch().repository except errors.NotBranchError: stacked_repo = stacked_dir.open_repository() try: repository.add_fallback_repository(stacked_repo) except errors.UnstackableRepositoryFormat: if self._require_stacking: raise else: self._require_stacking = True def acquire_repository(self, make_working_trees=None, shared=False, possible_transports=None): """Acquire a repository for this bzrdir. Implementations may create a new repository or use a pre-exising repository. :param make_working_trees: If creating a repository, set make_working_trees to this value (if non-None) :param shared: If creating a repository, make it shared if True :return: A repository, is_new_flag (True if the repository was created). """ raise NotImplementedError(RepositoryAcquisitionPolicy.acquire_repository) class CreateRepository(RepositoryAcquisitionPolicy): """A policy of creating a new repository""" def __init__(self, bzrdir, stack_on=None, stack_on_pwd=None, require_stacking=False): """Constructor. :param bzrdir: The bzrdir to create the repository on. :param stack_on: A location to stack on :param stack_on_pwd: If stack_on is relative, the location it is relative to. """ RepositoryAcquisitionPolicy.__init__(self, stack_on, stack_on_pwd, require_stacking) self._bzrdir = bzrdir def acquire_repository(self, make_working_trees=None, shared=False, possible_transports=None): """Implementation of RepositoryAcquisitionPolicy.acquire_repository Creates the desired repository in the bzrdir we already have. """ if possible_transports is None: possible_transports = [] else: possible_transports = list(possible_transports) possible_transports.append(self._bzrdir.root_transport) stack_on = self._get_full_stack_on() if stack_on: format = self._bzrdir._format format.require_stacking(stack_on=stack_on, possible_transports=possible_transports) if not self._require_stacking: # We have picked up automatic stacking somewhere. note(gettext('Using default stacking branch {0} at {1}').format( self._stack_on, self._stack_on_pwd)) repository = self._bzrdir.create_repository(shared=shared) self._add_fallback(repository, possible_transports=possible_transports) if make_working_trees is not None: repository.set_make_working_trees(make_working_trees) return repository, True class UseExistingRepository(RepositoryAcquisitionPolicy): """A policy of reusing an existing repository""" def __init__(self, repository, stack_on=None, stack_on_pwd=None, require_stacking=False): """Constructor. :param repository: The repository to use. :param stack_on: A location to stack on :param stack_on_pwd: If stack_on is relative, the location it is relative to. """ RepositoryAcquisitionPolicy.__init__(self, stack_on, stack_on_pwd, require_stacking) self._repository = repository def acquire_repository(self, make_working_trees=None, shared=False, possible_transports=None): """Implementation of RepositoryAcquisitionPolicy.acquire_repository Returns an existing repository to use. """ if possible_transports is None: possible_transports = [] else: possible_transports = list(possible_transports) possible_transports.append(self._repository.bzrdir.transport) self._add_fallback(self._repository, possible_transports=possible_transports) return self._repository, False def register_metadir(registry, key, repository_format, help, native=True, deprecated=False, branch_format=None, tree_format=None, hidden=False, experimental=False, alias=False, bzrdir_format=None): """Register a metadir subformat. These all use a meta bzrdir, but can be parameterized by the Repository/Branch/WorkingTreeformats. :param repository_format: The fully-qualified repository format class name as a string. :param branch_format: Fully-qualified branch format class name as a string. :param tree_format: Fully-qualified tree format class name as a string. """ if bzrdir_format is None: bzrdir_format = BzrDirMetaFormat1 # This should be expanded to support setting WorkingTree and Branch # formats, once the API supports that. def _load(full_name): mod_name, factory_name = full_name.rsplit('.', 1) try: factory = pyutils.get_named_object(mod_name, factory_name) except ImportError, e: raise ImportError('failed to load %s: %s' % (full_name, e)) except AttributeError: raise AttributeError('no factory %s in module %r' % (full_name, sys.modules[mod_name])) return factory() def helper(): bd = bzrdir_format() if branch_format is not None: bd.set_branch_format(_load(branch_format)) if tree_format is not None: bd.workingtree_format = _load(tree_format) if repository_format is not None: bd.repository_format = _load(repository_format) return bd registry.register(key, helper, help, native, deprecated, hidden, experimental, alias) register_metadir(controldir.format_registry, 'knit', 'bzrlib.repofmt.knitrepo.RepositoryFormatKnit1', 'Format using knits. Recommended for interoperation with bzr <= 0.14.', branch_format='bzrlib.branchfmt.fullhistory.BzrBranchFormat5', tree_format='bzrlib.workingtree_3.WorkingTreeFormat3', hidden=True, deprecated=True) register_metadir(controldir.format_registry, 'dirstate', 'bzrlib.repofmt.knitrepo.RepositoryFormatKnit1', help='Format using dirstate for working trees. ' 'Compatible with bzr 0.8 and ' 'above when accessed over the network. Introduced in bzr 0.15.', branch_format='bzrlib.branchfmt.fullhistory.BzrBranchFormat5', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', hidden=True, deprecated=True) register_metadir(controldir.format_registry, 'dirstate-tags', 'bzrlib.repofmt.knitrepo.RepositoryFormatKnit1', help='Variant of dirstate with support for tags. ' 'Introduced in bzr 0.15.', branch_format='bzrlib.branch.BzrBranchFormat6', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', hidden=True, deprecated=True) register_metadir(controldir.format_registry, 'rich-root', 'bzrlib.repofmt.knitrepo.RepositoryFormatKnit4', help='Variant of dirstate with better handling of tree roots. ' 'Introduced in bzr 1.0', branch_format='bzrlib.branch.BzrBranchFormat6', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', hidden=True, deprecated=True) register_metadir(controldir.format_registry, 'dirstate-with-subtree', 'bzrlib.repofmt.knitrepo.RepositoryFormatKnit3', help='Variant of dirstate with support for nested trees. ' 'Introduced in 0.15.', branch_format='bzrlib.branch.BzrBranchFormat6', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', experimental=True, hidden=True, ) register_metadir(controldir.format_registry, 'pack-0.92', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatKnitPack1', help='Pack-based format used in 1.x series. Introduced in 0.92. ' 'Interoperates with bzr repositories before 0.92 but cannot be ' 'read by bzr < 0.92. ' , branch_format='bzrlib.branch.BzrBranchFormat6', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', deprecated=True, ) register_metadir(controldir.format_registry, 'pack-0.92-subtree', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatKnitPack3', help='Pack-based format used in 1.x series, with subtree support. ' 'Introduced in 0.92. Interoperates with ' 'bzr repositories before 0.92 but cannot be read by bzr < 0.92. ' , branch_format='bzrlib.branch.BzrBranchFormat6', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', hidden=True, deprecated=True, experimental=True, ) register_metadir(controldir.format_registry, 'rich-root-pack', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatKnitPack4', help='A variant of pack-0.92 that supports rich-root data ' '(needed for bzr-svn and bzr-git). Introduced in 1.0.', branch_format='bzrlib.branch.BzrBranchFormat6', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', hidden=True, deprecated=True, ) register_metadir(controldir.format_registry, '1.6', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatKnitPack5', help='A format that allows a branch to indicate that there is another ' '(stacked) repository that should be used to access data that is ' 'not present locally.', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', hidden=True, deprecated=True, ) register_metadir(controldir.format_registry, '1.6.1-rich-root', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatKnitPack5RichRoot', help='A variant of 1.6 that supports rich-root data ' '(needed for bzr-svn and bzr-git).', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', hidden=True, deprecated=True, ) register_metadir(controldir.format_registry, '1.9', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatKnitPack6', help='A repository format using B+tree indexes. These indexes ' 'are smaller in size, have smarter caching and provide faster ' 'performance for most operations.', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', hidden=True, deprecated=True, ) register_metadir(controldir.format_registry, '1.9-rich-root', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatKnitPack6RichRoot', help='A variant of 1.9 that supports rich-root data ' '(needed for bzr-svn and bzr-git).', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat4', hidden=True, deprecated=True, ) register_metadir(controldir.format_registry, '1.14', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatKnitPack6', help='A working-tree format that supports content filtering.', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat5', hidden=True, deprecated=True, ) register_metadir(controldir.format_registry, '1.14-rich-root', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatKnitPack6RichRoot', help='A variant of 1.14 that supports rich-root data ' '(needed for bzr-svn and bzr-git).', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat5', hidden=True, deprecated=True, ) # The following un-numbered 'development' formats should always just be aliases. register_metadir(controldir.format_registry, 'development-subtree', 'bzrlib.repofmt.groupcompress_repo.RepositoryFormat2aSubtree', help='Current development format, subtree variant. Can convert data to and ' 'from pack-0.92-subtree (and anything compatible with ' 'pack-0.92-subtree) format repositories. Repositories and branches in ' 'this format can only be read by bzr.dev. Please read ' 'http://doc.bazaar.canonical.com/latest/developers/development-repo.html ' 'before use.', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat6', experimental=True, hidden=True, alias=False, # Restore to being an alias when an actual development subtree format is added # This current non-alias status is simply because we did not introduce a # chk based subtree format. ) register_metadir(controldir.format_registry, 'development5-subtree', 'bzrlib.repofmt.knitpack_repo.RepositoryFormatPackDevelopment2Subtree', help='Development format, subtree variant. Can convert data to and ' 'from pack-0.92-subtree (and anything compatible with ' 'pack-0.92-subtree) format repositories. Repositories and branches in ' 'this format can only be read by bzr.dev. Please read ' 'http://doc.bazaar.canonical.com/latest/developers/development-repo.html ' 'before use.', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat6', experimental=True, hidden=True, alias=False, ) register_metadir(controldir.format_registry, 'development-colo', 'bzrlib.repofmt.groupcompress_repo.RepositoryFormat2a', help='The 2a format with experimental support for colocated branches.\n', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat6', experimental=True, bzrdir_format=BzrDirMetaFormat1Colo, ) # And the development formats above will have aliased one of the following: # Finally, the current format. register_metadir(controldir.format_registry, '2a', 'bzrlib.repofmt.groupcompress_repo.RepositoryFormat2a', help='Format for the bzr 2.0 series.\n' 'Uses group-compress storage.\n' 'Provides rich roots which are a one-way transition.\n', # 'storage in packs, 255-way hashed CHK inventory, bencode revision, group compress, ' # 'rich roots. Supported by bzr 1.16 and later.', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat6', experimental=False, ) # The following format should be an alias for the rich root equivalent # of the default format register_metadir(controldir.format_registry, 'default-rich-root', 'bzrlib.repofmt.groupcompress_repo.RepositoryFormat2a', branch_format='bzrlib.branch.BzrBranchFormat7', tree_format='bzrlib.workingtree_4.WorkingTreeFormat6', alias=True, hidden=True, help='Same as 2a.') # The current format that is made on 'bzr init'. format_name = config.GlobalStack().get('default_format') controldir.format_registry.set_default(format_name) # XXX 2010-08-20 JRV: There is still a lot of code relying on # bzrlib.bzrdir.format_registry existing. When BzrDir.create/BzrDir.open/etc # get changed to ControlDir.create/ControlDir.open/etc this should be removed. format_registry = controldir.format_registry bzr-2.7.0/bzrlib/cache_utf8.py0000644000000000000000000001060011673635356014326 0ustar 00000000000000# Copyright (C) 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # TODO: Some kind of command-line display of revision properties: # perhaps show them in log -v and allow them as options to the commit command. """Some functions to enable caching the conversion between unicode to utf8""" from __future__ import absolute_import import codecs _utf8_encode = codecs.utf_8_encode _utf8_decode = codecs.utf_8_decode def _utf8_decode_with_None(bytestring, _utf8_decode=_utf8_decode): """wrap _utf8_decode to support None->None for optional strings. Also, only return the Unicode portion, since we don't care about the second return value. """ if bytestring is None: return None else: return _utf8_decode(bytestring)[0] # Map revisions from and to utf8 encoding # Whenever we do an encode/decode operation, we save the result, so that # we don't have to do it again. _unicode_to_utf8_map = {} _utf8_to_unicode_map = {} def encode(unicode_str, _uni_to_utf8=_unicode_to_utf8_map, _utf8_to_uni=_utf8_to_unicode_map, _utf8_encode=_utf8_encode): """Take this unicode revision id, and get a unicode version""" # If the key is in the cache try/KeyError is 50% faster than # val = dict.get(key), if val is None: # On jam's machine the difference is # try/KeyError: 900ms # if None: 1250ms # Since these are primarily used when iterating over a knit entry # *most* of the time the key will already be in the cache, so use the # fast path try: return _uni_to_utf8[unicode_str] except KeyError: _uni_to_utf8[unicode_str] = utf8_str = _utf8_encode(unicode_str)[0] _utf8_to_uni[utf8_str] = unicode_str return utf8_str def decode(utf8_str, _uni_to_utf8=_unicode_to_utf8_map, _utf8_to_uni=_utf8_to_unicode_map, _utf8_decode=_utf8_decode): """Take a utf8 revision id, and decode it, but cache the result""" try: return _utf8_to_uni[utf8_str] except KeyError: unicode_str = _utf8_decode(utf8_str)[0] _utf8_to_uni[utf8_str] = unicode_str _uni_to_utf8[unicode_str] = utf8_str return unicode_str def get_cached_unicode(unicode_str): """Return a cached version of the unicode string. This has a similar idea to that of intern() in that it tries to return a singleton string. Only it works for unicode strings. """ # This might return the same object, or it might return the cached one # the decode() should just be a hash lookup, because the encode() side # should add the entry to the maps return decode(encode(unicode_str)) def get_cached_utf8(utf8_str): """Return a cached version of the utf-8 string. Get a cached version of this string (similar to intern()). At present, this will be decoded to ensure it is a utf-8 string. In the future this might change to simply caching the string. """ return encode(decode(utf8_str)) def get_cached_ascii(ascii_str, _uni_to_utf8=_unicode_to_utf8_map, _utf8_to_uni=_utf8_to_unicode_map): """This is a string which is identical in utf-8 and unicode.""" # We don't need to do any encoding, but we want _utf8_to_uni to return a # real Unicode string. Unicode and plain strings of this type will have the # same hash, so we can just use it as the key in _uni_to_utf8, but we need # the return value to be different in _utf8_to_uni ascii_str = _uni_to_utf8.setdefault(ascii_str, ascii_str) _utf8_to_uni.setdefault(ascii_str, unicode(ascii_str)) return ascii_str def clear_encoding_cache(): """Clear the encoding and decoding caches""" _unicode_to_utf8_map.clear() _utf8_to_unicode_map.clear() bzr-2.7.0/bzrlib/cethread.py0000644000000000000000000001430511673360271014071 0ustar 00000000000000# Copyright (C) 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import sys import threading class CatchingExceptionThread(threading.Thread): """A thread that keeps track of exceptions. If an exception occurs during the thread execution, it's caught and re-raised when the thread is joined(). """ def __init__(self, *args, **kwargs): # There are cases where the calling thread must wait, yet, if an # exception occurs, the event should be set so the caller is not # blocked. The main example is a calling thread that want to wait for # the called thread to be in a given state before continuing. try: sync_event = kwargs.pop('sync_event') except KeyError: # If the caller didn't pass a specific event, create our own sync_event = threading.Event() super(CatchingExceptionThread, self).__init__(*args, **kwargs) self.set_sync_event(sync_event) self.exception = None self.ignored_exceptions = None # see set_ignored_exceptions self.lock = threading.Lock() # compatibility thunk for python-2.4 and python-2.5... if sys.version_info < (2, 6): name = property(threading.Thread.getName, threading.Thread.setName) def set_sync_event(self, event): """Set the ``sync_event`` event used to synchronize exception catching. When the thread uses an event to synchronize itself with another thread (setting it when the other thread can wake up from a ``wait`` call), the event must be set after catching an exception or the other thread will hang. Some threads require multiple events and should set the relevant one when appropriate. Note that the event should be initially cleared so the caller can wait() on him and be released when the thread set the event. Also note that the thread can use multiple events, setting them as it progress, while the caller can chose to wait on any of them. What matters is that there is always one event set so that the caller is always released when an exception is caught. Re-using the same event is therefore risky as the thread itself has no idea about which event the caller is waiting on. If the caller has already been released then a cleared event won't guarantee that the caller is still waiting on it. """ self.sync_event = event def switch_and_set(self, new): """Switch to a new ``sync_event`` and set the current one. Using this method protects against race conditions while setting a new ``sync_event``. Note that this allows a caller to wait either on the old or the new event depending on whether it wants a fine control on what is happening inside a thread. :param new: The event that will become ``sync_event`` """ cur = self.sync_event self.lock.acquire() try: # Always release the lock try: self.set_sync_event(new) # From now on, any exception will be synced with the new event except: # Unlucky, we couldn't set the new sync event, try restoring a # safe state self.set_sync_event(cur) raise # Setting the current ``sync_event`` will release callers waiting # on it, note that it will also be set in run() if an exception is # raised cur.set() finally: self.lock.release() def set_ignored_exceptions(self, ignored): """Declare which exceptions will be ignored. :param ignored: Can be either: - None: all exceptions will be raised, - an exception class: the instances of this class will be ignored, - a tuple of exception classes: the instances of any class of the list will be ignored, - a callable: that will be passed the exception object and should return True if the exception should be ignored """ if ignored is None: self.ignored_exceptions = None elif isinstance(ignored, (Exception, tuple)): self.ignored_exceptions = lambda e: isinstance(e, ignored) else: self.ignored_exceptions = ignored def run(self): """Overrides Thread.run to capture any exception.""" self.sync_event.clear() try: try: super(CatchingExceptionThread, self).run() except: self.exception = sys.exc_info() finally: # Make sure the calling thread is released self.sync_event.set() def join(self, timeout=None): """Overrides Thread.join to raise any exception caught. Calling join(timeout=0) will raise the caught exception or return None if the thread is still alive. """ super(CatchingExceptionThread, self).join(timeout) if self.exception is not None: exc_class, exc_value, exc_tb = self.exception self.exception = None # The exception should be raised only once if (self.ignored_exceptions is None or not self.ignored_exceptions(exc_value)): # Raise non ignored exceptions raise exc_class, exc_value, exc_tb def pending_exception(self): """Raise the caught exception. This does nothing if no exception occurred. """ self.join(timeout=0) bzr-2.7.0/bzrlib/check.py0000644000000000000000000004606011673635356013403 0ustar 00000000000000# Copyright (C) 2005, 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # TODO: Check ancestries are correct for every revision: includes # every committed so far, and in a reasonable order. # TODO: Also check non-mainline revisions mentioned as parents. # TODO: Check for extra files in the control directory. # TODO: Check revision, inventory and entry objects have all # required fields. # TODO: Get every revision in the revision-store even if they're not # referenced by history and make sure they're all valid. # TODO: Perhaps have a way to record errors other than by raising exceptions; # would perhaps be enough to accumulate exception objects in a list without # raising them. If there's more than one exception it'd be good to see them # all. """Checking of bzr objects. check_refs is a concept used for optimising check. Objects that depend on other objects (e.g. tree on repository) can list the objects they would be requesting so that when the dependent object is checked, matches can be pulled out and evaluated in-line rather than re-reading the same data many times. check_refs are tuples (kind, value). Currently defined kinds are: * 'trees', where value is a revid and the looked up objects are revision trees. * 'lefthand-distance', where value is a revid and the looked up objects are the distance along the lefthand path to NULL for that revid. * 'revision-existence', where value is a revid, and the result is True or False indicating that the revision was found/not found. """ from __future__ import absolute_import from bzrlib import ( errors, ui, ) from bzrlib.branch import Branch from bzrlib.controldir import ControlDir from bzrlib.revision import NULL_REVISION from bzrlib.trace import note from bzrlib.workingtree import WorkingTree from bzrlib.i18n import gettext class Check(object): """Check a repository""" def __init__(self, repository, check_repo=True): self.repository = repository def report_results(self, verbose): raise NotImplementedError(self.report_results) class VersionedFileCheck(Check): """Check a versioned file repository""" # The Check object interacts with InventoryEntry.check, etc. def __init__(self, repository, check_repo=True): self.repository = repository self.checked_rev_cnt = 0 self.ghosts = set() self.missing_parent_links = {} self.missing_inventory_sha_cnt = 0 self.missing_revision_cnt = 0 self.checked_weaves = set() self.unreferenced_versions = set() self.inconsistent_parents = [] self.rich_roots = repository.supports_rich_root() self.text_key_references = {} self.check_repo = check_repo self.other_results = [] # Plain text lines to include in the report self._report_items = [] # Keys we are looking for; may be large and need spilling to disk. # key->(type(revision/inventory/text/signature/map), sha1, first-referer) self.pending_keys = {} # Ancestors map for all of revisions being checked; while large helper # functions we call would create it anyway, so better to have once and # keep. self.ancestors = {} def check(self, callback_refs=None, check_repo=True): if callback_refs is None: callback_refs = {} self.repository.lock_read() self.progress = ui.ui_factory.nested_progress_bar() try: self.progress.update(gettext('check'), 0, 4) if self.check_repo: self.progress.update(gettext('checking revisions'), 0) self.check_revisions() self.progress.update(gettext('checking commit contents'), 1) self.repository._check_inventories(self) self.progress.update(gettext('checking file graphs'), 2) # check_weaves is done after the revision scan so that # revision index is known to be valid. self.check_weaves() self.progress.update(gettext('checking branches and trees'), 3) if callback_refs: repo = self.repository # calculate all refs, and callback the objects requesting them. refs = {} wanting_items = set() # Current crude version calculates everything and calls # everything at once. Doing a queue and popping as things are # satisfied would be cheaper on memory [but few people have # huge numbers of working trees today. TODO: fix before # landing]. distances = set() existences = set() for ref, wantlist in callback_refs.iteritems(): wanting_items.update(wantlist) kind, value = ref if kind == 'trees': refs[ref] = repo.revision_tree(value) elif kind == 'lefthand-distance': distances.add(value) elif kind == 'revision-existence': existences.add(value) else: raise AssertionError( 'unknown ref kind for ref %s' % ref) node_distances = repo.get_graph().find_lefthand_distances(distances) for key, distance in node_distances.iteritems(): refs[('lefthand-distance', key)] = distance if key in existences and distance > 0: refs[('revision-existence', key)] = True existences.remove(key) parent_map = repo.get_graph().get_parent_map(existences) for key in parent_map: refs[('revision-existence', key)] = True existences.remove(key) for key in existences: refs[('revision-existence', key)] = False for item in wanting_items: if isinstance(item, WorkingTree): item._check(refs) if isinstance(item, Branch): self.other_results.append(item.check(refs)) finally: self.progress.finished() self.repository.unlock() def _check_revisions(self, revisions_iterator): """Check revision objects by decorating a generator. :param revisions_iterator: An iterator of(revid, Revision-or-None). :return: A generator of the contents of revisions_iterator. """ self.planned_revisions = set() for revid, revision in revisions_iterator: yield revid, revision self._check_one_rev(revid, revision) # Flatten the revisions we found to guarantee consistent later # iteration. self.planned_revisions = list(self.planned_revisions) # TODO: extract digital signatures as items to callback on too. def check_revisions(self): """Scan revisions, checking data directly available as we go.""" revision_iterator = self.repository._iter_revisions(None) revision_iterator = self._check_revisions(revision_iterator) # We read the all revisions here: # - doing this allows later code to depend on the revision index. # - we can fill out existence flags at this point # - we can read the revision inventory sha at this point # - we can check properties and serialisers etc. if not self.repository._format.revision_graph_can_have_wrong_parents: # The check against the index isn't needed. self.revs_with_bad_parents_in_index = None for thing in revision_iterator: pass else: bad_revisions = self.repository._find_inconsistent_revision_parents( revision_iterator) self.revs_with_bad_parents_in_index = list(bad_revisions) def report_results(self, verbose): if self.check_repo: self._report_repo_results(verbose) for result in self.other_results: result.report_results(verbose) def _report_repo_results(self, verbose): note(gettext('checked repository {0} format {1}').format( self.repository.user_url, self.repository._format)) note(gettext('%6d revisions'), self.checked_rev_cnt) note(gettext('%6d file-ids'), len(self.checked_weaves)) if verbose: note(gettext('%6d unreferenced text versions'), len(self.unreferenced_versions)) if verbose and len(self.unreferenced_versions): for file_id, revision_id in self.unreferenced_versions: note(gettext('unreferenced version: {{{0}}} in {1}').format(revision_id, file_id)) if self.missing_inventory_sha_cnt: note(gettext('%6d revisions are missing inventory_sha1'), self.missing_inventory_sha_cnt) if self.missing_revision_cnt: note(gettext('%6d revisions are mentioned but not present'), self.missing_revision_cnt) if len(self.ghosts): note(gettext('%6d ghost revisions'), len(self.ghosts)) if verbose: for ghost in self.ghosts: note(' %s', ghost) if len(self.missing_parent_links): note(gettext('%6d revisions missing parents in ancestry'), len(self.missing_parent_links)) if verbose: for link, linkers in self.missing_parent_links.items(): note(gettext(' %s should be in the ancestry for:'), link) for linker in linkers: note(' * %s', linker) if len(self.inconsistent_parents): note(gettext('%6d inconsistent parents'), len(self.inconsistent_parents)) if verbose: for info in self.inconsistent_parents: revision_id, file_id, found_parents, correct_parents = info note(gettext(' * {0} version {1} has parents {2!r} ' 'but should have {3!r}').format( file_id, revision_id, found_parents, correct_parents)) if self.revs_with_bad_parents_in_index: note(gettext( '%6d revisions have incorrect parents in the revision index'), len(self.revs_with_bad_parents_in_index)) if verbose: for item in self.revs_with_bad_parents_in_index: revision_id, index_parents, actual_parents = item note(gettext( ' {0} has wrong parents in index: ' '{1!r} should be {2!r}').format( revision_id, index_parents, actual_parents)) for item in self._report_items: note(item) def _check_one_rev(self, rev_id, rev): """Cross-check one revision. :param rev_id: A revision id to check. :param rev: A revision or None to indicate a missing revision. """ if rev.revision_id != rev_id: self._report_items.append(gettext( 'Mismatched internal revid {{{0}}} and index revid {{{1}}}').format( rev.revision_id, rev_id)) rev_id = rev.revision_id # Check this revision tree etc, and count as seen when we encounter a # reference to it. self.planned_revisions.add(rev_id) # It is not a ghost self.ghosts.discard(rev_id) # Count all parents as ghosts if we haven't seen them yet. for parent in rev.parent_ids: if not parent in self.planned_revisions: self.ghosts.add(parent) self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,) self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory', rev.inventory_sha1) self.checked_rev_cnt += 1 def add_pending_item(self, referer, key, kind, sha1): """Add a reference to a sha1 to be cross checked against a key. :param referer: The referer that expects key to have sha1. :param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234') :param kind: revision/inventory/text/map/signature :param sha1: A hex sha1 or None if no sha1 is known. """ existing = self.pending_keys.get(key) if existing: if sha1 != existing[1]: self._report_items.append(gettext('Multiple expected sha1s for {0}. {{{1}}}' ' expects {{{2}}}, {{{3}}} expects {{{4}}}').format( key, referer, sha1, existing[1], existing[0])) else: self.pending_keys[key] = (kind, sha1, referer) def check_weaves(self): """Check all the weaves we can get our hands on. """ weave_ids = [] storebar = ui.ui_factory.nested_progress_bar() try: self._check_weaves(storebar) finally: storebar.finished() def _check_weaves(self, storebar): storebar.update('text-index', 0, 2) if self.repository._format.fast_deltas: # We haven't considered every fileid instance so far. weave_checker = self.repository._get_versioned_file_checker( ancestors=self.ancestors) else: weave_checker = self.repository._get_versioned_file_checker( text_key_references=self.text_key_references, ancestors=self.ancestors) storebar.update('file-graph', 1) result = weave_checker.check_file_version_parents( self.repository.texts) self.checked_weaves = weave_checker.file_ids bad_parents, unused_versions = result bad_parents = bad_parents.items() for text_key, (stored_parents, correct_parents) in bad_parents: # XXX not ready for id join/split operations. weave_id = text_key[0] revision_id = text_key[-1] weave_parents = tuple([parent[-1] for parent in stored_parents]) correct_parents = tuple([parent[-1] for parent in correct_parents]) self.inconsistent_parents.append( (revision_id, weave_id, weave_parents, correct_parents)) self.unreferenced_versions.update(unused_versions) def _add_entry_to_text_key_references(self, inv, entry): if not self.rich_roots and entry.name == '': return key = (entry.file_id, entry.revision) self.text_key_references.setdefault(key, False) if entry.revision == inv.revision_id: self.text_key_references[key] = True def scan_branch(branch, needed_refs, to_unlock): """Scan a branch for refs. :param branch: The branch to schedule for checking. :param needed_refs: Refs we are accumulating. :param to_unlock: The unlock list accumulating. """ note(gettext("Checking branch at '%s'.") % (branch.base,)) branch.lock_read() to_unlock.append(branch) branch_refs = branch._get_check_refs() for ref in branch_refs: reflist = needed_refs.setdefault(ref, []) reflist.append(branch) def scan_tree(base_tree, tree, needed_refs, to_unlock): """Scan a tree for refs. :param base_tree: The original tree check opened, used to detect duplicate tree checks. :param tree: The tree to schedule for checking. :param needed_refs: Refs we are accumulating. :param to_unlock: The unlock list accumulating. """ if base_tree is not None and tree.basedir == base_tree.basedir: return note(gettext("Checking working tree at '%s'.") % (tree.basedir,)) tree.lock_read() to_unlock.append(tree) tree_refs = tree._get_check_refs() for ref in tree_refs: reflist = needed_refs.setdefault(ref, []) reflist.append(tree) def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False): """Check multiple objects. If errors occur they are accumulated and reported as far as possible, and an exception raised at the end of the process. """ try: base_tree, branch, repo, relpath = \ ControlDir.open_containing_tree_branch_or_repository(path) except errors.NotBranchError: base_tree = branch = repo = None to_unlock = [] needed_refs= {} try: if base_tree is not None: # If the tree is a lightweight checkout we won't see it in # repo.find_branches - add now. if do_tree: scan_tree(None, base_tree, needed_refs, to_unlock) branch = base_tree.branch if branch is not None: # We have a branch if repo is None: # The branch is in a shared repository repo = branch.repository if repo is not None: repo.lock_read() to_unlock.append(repo) branches = repo.find_branches(using=True) saw_tree = False if do_branch or do_tree: for branch in branches: if do_tree: try: tree = branch.bzrdir.open_workingtree() saw_tree = True except (errors.NotLocalUrl, errors.NoWorkingTree): pass else: scan_tree(base_tree, tree, needed_refs, to_unlock) if do_branch: scan_branch(branch, needed_refs, to_unlock) if do_branch and not branches: note(gettext("No branch found at specified location.")) if do_tree and base_tree is None and not saw_tree: note(gettext("No working tree found at specified location.")) if do_repo or do_branch or do_tree: if do_repo: note(gettext("Checking repository at '%s'.") % (repo.user_url,)) result = repo.check(None, callback_refs=needed_refs, check_repo=do_repo) result.report_results(verbose) else: if do_tree: note(gettext("No working tree found at specified location.")) if do_branch: note(gettext("No branch found at specified location.")) if do_repo: note(gettext("No repository found at specified location.")) finally: for thing in to_unlock: thing.unlock() bzr-2.7.0/bzrlib/chk_map.py0000644000000000000000000022663011673635356013733 0ustar 00000000000000# Copyright (C) 2008-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Persistent maps from tuple_of_strings->string using CHK stores. Overview and current status: The CHKMap class implements a dict from tuple_of_strings->string by using a trie with internal nodes of 8-bit fan out; The key tuples are mapped to strings by joining them by \x00, and \x00 padding shorter keys out to the length of the longest key. Leaf nodes are packed as densely as possible, and internal nodes are all an additional 8-bits wide leading to a sparse upper tree. Updates to a CHKMap are done preferentially via the apply_delta method, to allow optimisation of the update operation; but individual map/unmap calls are possible and supported. Individual changes via map/unmap are buffered in memory until the _save method is called to force serialisation of the tree. apply_delta records its changes immediately by performing an implicit _save. TODO: ----- Densely packed upper nodes. """ from __future__ import absolute_import import heapq import threading from bzrlib import lazy_import lazy_import.lazy_import(globals(), """ from bzrlib import ( errors, ) """) from bzrlib import ( errors, lru_cache, osutils, registry, static_tuple, trace, ) from bzrlib.static_tuple import StaticTuple # approx 4MB # If each line is 50 bytes, and you have 255 internal pages, with 255-way fan # out, it takes 3.1MB to cache the layer. _PAGE_CACHE_SIZE = 4*1024*1024 # Per thread caches for 2 reasons: # - in the server we may be serving very different content, so we get less # cache thrashing. # - we avoid locking on every cache lookup. _thread_caches = threading.local() # The page cache. _thread_caches.page_cache = None def _get_cache(): """Get the per-thread page cache. We need a function to do this because in a new thread the _thread_caches threading.local object does not have the cache initialized yet. """ page_cache = getattr(_thread_caches, 'page_cache', None) if page_cache is None: # We are caching bytes so len(value) is perfectly accurate page_cache = lru_cache.LRUSizeCache(_PAGE_CACHE_SIZE) _thread_caches.page_cache = page_cache return page_cache def clear_cache(): _get_cache().clear() # If a ChildNode falls below this many bytes, we check for a remap _INTERESTING_NEW_SIZE = 50 # If a ChildNode shrinks by more than this amount, we check for a remap _INTERESTING_SHRINKAGE_LIMIT = 20 def _search_key_plain(key): """Map the key tuple into a search string that just uses the key bytes.""" return '\x00'.join(key) search_key_registry = registry.Registry() search_key_registry.register('plain', _search_key_plain) class CHKMap(object): """A persistent map from string to string backed by a CHK store.""" __slots__ = ('_store', '_root_node', '_search_key_func') def __init__(self, store, root_key, search_key_func=None): """Create a CHKMap object. :param store: The store the CHKMap is stored in. :param root_key: The root key of the map. None to create an empty CHKMap. :param search_key_func: A function mapping a key => bytes. These bytes are then used by the internal nodes to split up leaf nodes into multiple pages. """ self._store = store if search_key_func is None: search_key_func = _search_key_plain self._search_key_func = search_key_func if root_key is None: self._root_node = LeafNode(search_key_func=search_key_func) else: self._root_node = self._node_key(root_key) def apply_delta(self, delta): """Apply a delta to the map. :param delta: An iterable of old_key, new_key, new_value tuples. If new_key is not None, then new_key->new_value is inserted into the map; if old_key is not None, then the old mapping of old_key is removed. """ has_deletes = False # Check preconditions first. as_st = StaticTuple.from_sequence new_items = set([as_st(key) for (old, key, value) in delta if key is not None and old is None]) existing_new = list(self.iteritems(key_filter=new_items)) if existing_new: raise errors.InconsistentDeltaDelta(delta, "New items are already in the map %r." % existing_new) # Now apply changes. for old, new, value in delta: if old is not None and old != new: self.unmap(old, check_remap=False) has_deletes = True for old, new, value in delta: if new is not None: self.map(new, value) if has_deletes: self._check_remap() return self._save() def _ensure_root(self): """Ensure that the root node is an object not a key.""" if type(self._root_node) is StaticTuple: # Demand-load the root self._root_node = self._get_node(self._root_node) def _get_node(self, node): """Get a node. Note that this does not update the _items dict in objects containing a reference to this node. As such it does not prevent subsequent IO being performed. :param node: A tuple key or node object. :return: A node object. """ if type(node) is StaticTuple: bytes = self._read_bytes(node) return _deserialise(bytes, node, search_key_func=self._search_key_func) else: return node def _read_bytes(self, key): try: return _get_cache()[key] except KeyError: stream = self._store.get_record_stream([key], 'unordered', True) bytes = stream.next().get_bytes_as('fulltext') _get_cache()[key] = bytes return bytes def _dump_tree(self, include_keys=False): """Return the tree in a string representation.""" self._ensure_root() res = self._dump_tree_node(self._root_node, prefix='', indent='', include_keys=include_keys) res.append('') # Give a trailing '\n' return '\n'.join(res) def _dump_tree_node(self, node, prefix, indent, include_keys=True): """For this node and all children, generate a string representation.""" result = [] if not include_keys: key_str = '' else: node_key = node.key() if node_key is not None: key_str = ' %s' % (node_key[0],) else: key_str = ' None' result.append('%s%r %s%s' % (indent, prefix, node.__class__.__name__, key_str)) if type(node) is InternalNode: # Trigger all child nodes to get loaded list(node._iter_nodes(self._store)) for prefix, sub in sorted(node._items.iteritems()): result.extend(self._dump_tree_node(sub, prefix, indent + ' ', include_keys=include_keys)) else: for key, value in sorted(node._items.iteritems()): # Don't use prefix nor indent here to line up when used in # tests in conjunction with assertEqualDiff result.append(' %r %r' % (tuple(key), value)) return result @classmethod def from_dict(klass, store, initial_value, maximum_size=0, key_width=1, search_key_func=None): """Create a CHKMap in store with initial_value as the content. :param store: The store to record initial_value in, a VersionedFiles object with 1-tuple keys supporting CHK key generation. :param initial_value: A dict to store in store. Its keys and values must be bytestrings. :param maximum_size: The maximum_size rule to apply to nodes. This determines the size at which no new data is added to a single node. :param key_width: The number of elements in each key_tuple being stored in this map. :param search_key_func: A function mapping a key => bytes. These bytes are then used by the internal nodes to split up leaf nodes into multiple pages. :return: The root chk of the resulting CHKMap. """ root_key = klass._create_directly(store, initial_value, maximum_size=maximum_size, key_width=key_width, search_key_func=search_key_func) if type(root_key) is not StaticTuple: raise AssertionError('we got a %s instead of a StaticTuple' % (type(root_key),)) return root_key @classmethod def _create_via_map(klass, store, initial_value, maximum_size=0, key_width=1, search_key_func=None): result = klass(store, None, search_key_func=search_key_func) result._root_node.set_maximum_size(maximum_size) result._root_node._key_width = key_width delta = [] for key, value in initial_value.items(): delta.append((None, key, value)) root_key = result.apply_delta(delta) return root_key @classmethod def _create_directly(klass, store, initial_value, maximum_size=0, key_width=1, search_key_func=None): node = LeafNode(search_key_func=search_key_func) node.set_maximum_size(maximum_size) node._key_width = key_width as_st = StaticTuple.from_sequence node._items = dict([(as_st(key), val) for key, val in initial_value.iteritems()]) node._raw_size = sum([node._key_value_len(key, value) for key,value in node._items.iteritems()]) node._len = len(node._items) node._compute_search_prefix() node._compute_serialised_prefix() if (node._len > 1 and maximum_size and node._current_size() > maximum_size): prefix, node_details = node._split(store) if len(node_details) == 1: raise AssertionError('Failed to split using node._split') node = InternalNode(prefix, search_key_func=search_key_func) node.set_maximum_size(maximum_size) node._key_width = key_width for split, subnode in node_details: node.add_node(split, subnode) keys = list(node.serialise(store)) return keys[-1] def iter_changes(self, basis): """Iterate over the changes between basis and self. :return: An iterator of tuples: (key, old_value, new_value). Old_value is None for keys only in self; new_value is None for keys only in basis. """ # Overview: # Read both trees in lexographic, highest-first order. # Any identical nodes we skip # Any unique prefixes we output immediately. # values in a leaf node are treated as single-value nodes in the tree # which allows them to be not-special-cased. We know to output them # because their value is a string, not a key(tuple) or node. # # corner cases to beware of when considering this function: # *) common references are at different heights. # consider two trees: # {'a': LeafNode={'aaa':'foo', 'aab':'bar'}, 'b': LeafNode={'b'}} # {'a': InternalNode={'aa':LeafNode={'aaa':'foo', 'aab':'bar'}, # 'ab':LeafNode={'ab':'bar'}} # 'b': LeafNode={'b'}} # the node with aaa/aab will only be encountered in the second tree # after reading the 'a' subtree, but it is encountered in the first # tree immediately. Variations on this may have read internal nodes # like this. we want to cut the entire pending subtree when we # realise we have a common node. For this we use a list of keys - # the path to a node - and check the entire path is clean as we # process each item. if self._node_key(self._root_node) == self._node_key(basis._root_node): return self._ensure_root() basis._ensure_root() excluded_keys = set() self_node = self._root_node basis_node = basis._root_node # A heap, each element is prefix, node(tuple/NodeObject/string), # key_path (a list of tuples, tail-sharing down the tree.) self_pending = [] basis_pending = [] def process_node(node, path, a_map, pending): # take a node and expand it node = a_map._get_node(node) if type(node) == LeafNode: path = (node._key, path) for key, value in node._items.items(): # For a LeafNode, the key is a serialized_key, rather than # a search_key, but the heap is using search_keys search_key = node._search_key_func(key) heapq.heappush(pending, (search_key, key, value, path)) else: # type(node) == InternalNode path = (node._key, path) for prefix, child in node._items.items(): heapq.heappush(pending, (prefix, None, child, path)) def process_common_internal_nodes(self_node, basis_node): self_items = set(self_node._items.items()) basis_items = set(basis_node._items.items()) path = (self_node._key, None) for prefix, child in self_items - basis_items: heapq.heappush(self_pending, (prefix, None, child, path)) path = (basis_node._key, None) for prefix, child in basis_items - self_items: heapq.heappush(basis_pending, (prefix, None, child, path)) def process_common_leaf_nodes(self_node, basis_node): self_items = set(self_node._items.items()) basis_items = set(basis_node._items.items()) path = (self_node._key, None) for key, value in self_items - basis_items: prefix = self._search_key_func(key) heapq.heappush(self_pending, (prefix, key, value, path)) path = (basis_node._key, None) for key, value in basis_items - self_items: prefix = basis._search_key_func(key) heapq.heappush(basis_pending, (prefix, key, value, path)) def process_common_prefix_nodes(self_node, self_path, basis_node, basis_path): # Would it be more efficient if we could request both at the same # time? self_node = self._get_node(self_node) basis_node = basis._get_node(basis_node) if (type(self_node) == InternalNode and type(basis_node) == InternalNode): # Matching internal nodes process_common_internal_nodes(self_node, basis_node) elif (type(self_node) == LeafNode and type(basis_node) == LeafNode): process_common_leaf_nodes(self_node, basis_node) else: process_node(self_node, self_path, self, self_pending) process_node(basis_node, basis_path, basis, basis_pending) process_common_prefix_nodes(self_node, None, basis_node, None) self_seen = set() basis_seen = set() excluded_keys = set() def check_excluded(key_path): # Note that this is N^2, it depends on us trimming trees # aggressively to not become slow. # A better implementation would probably have a reverse map # back to the children of a node, and jump straight to it when # a common node is detected, the proceed to remove the already # pending children. bzrlib.graph has a searcher module with a # similar problem. while key_path is not None: key, key_path = key_path if key in excluded_keys: return True return False loop_counter = 0 while self_pending or basis_pending: loop_counter += 1 if not self_pending: # self is exhausted: output remainder of basis for prefix, key, node, path in basis_pending: if check_excluded(path): continue node = basis._get_node(node) if key is not None: # a value yield (key, node, None) else: # subtree - fastpath the entire thing. for key, value in node.iteritems(basis._store): yield (key, value, None) return elif not basis_pending: # basis is exhausted: output remainder of self. for prefix, key, node, path in self_pending: if check_excluded(path): continue node = self._get_node(node) if key is not None: # a value yield (key, None, node) else: # subtree - fastpath the entire thing. for key, value in node.iteritems(self._store): yield (key, None, value) return else: # XXX: future optimisation - yield the smaller items # immediately rather than pushing everything on/off the # heaps. Applies to both internal nodes and leafnodes. if self_pending[0][0] < basis_pending[0][0]: # expand self prefix, key, node, path = heapq.heappop(self_pending) if check_excluded(path): continue if key is not None: # a value yield (key, None, node) else: process_node(node, path, self, self_pending) continue elif self_pending[0][0] > basis_pending[0][0]: # expand basis prefix, key, node, path = heapq.heappop(basis_pending) if check_excluded(path): continue if key is not None: # a value yield (key, node, None) else: process_node(node, path, basis, basis_pending) continue else: # common prefix: possibly expand both if self_pending[0][1] is None: # process next self read_self = True else: read_self = False if basis_pending[0][1] is None: # process next basis read_basis = True else: read_basis = False if not read_self and not read_basis: # compare a common value self_details = heapq.heappop(self_pending) basis_details = heapq.heappop(basis_pending) if self_details[2] != basis_details[2]: yield (self_details[1], basis_details[2], self_details[2]) continue # At least one side wasn't a simple value if (self._node_key(self_pending[0][2]) == self._node_key(basis_pending[0][2])): # Identical pointers, skip (and don't bother adding to # excluded, it won't turn up again. heapq.heappop(self_pending) heapq.heappop(basis_pending) continue # Now we need to expand this node before we can continue if read_self and read_basis: # Both sides start with the same prefix, so process # them in parallel self_prefix, _, self_node, self_path = heapq.heappop( self_pending) basis_prefix, _, basis_node, basis_path = heapq.heappop( basis_pending) if self_prefix != basis_prefix: raise AssertionError( '%r != %r' % (self_prefix, basis_prefix)) process_common_prefix_nodes( self_node, self_path, basis_node, basis_path) continue if read_self: prefix, key, node, path = heapq.heappop(self_pending) if check_excluded(path): continue process_node(node, path, self, self_pending) if read_basis: prefix, key, node, path = heapq.heappop(basis_pending) if check_excluded(path): continue process_node(node, path, basis, basis_pending) # print loop_counter def iteritems(self, key_filter=None): """Iterate over the entire CHKMap's contents.""" self._ensure_root() if key_filter is not None: as_st = StaticTuple.from_sequence key_filter = [as_st(key) for key in key_filter] return self._root_node.iteritems(self._store, key_filter=key_filter) def key(self): """Return the key for this map.""" if type(self._root_node) is StaticTuple: return self._root_node else: return self._root_node._key def __len__(self): self._ensure_root() return len(self._root_node) def map(self, key, value): """Map a key tuple to value. :param key: A key to map. :param value: The value to assign to key. """ key = StaticTuple.from_sequence(key) # Need a root object. self._ensure_root() prefix, node_details = self._root_node.map(self._store, key, value) if len(node_details) == 1: self._root_node = node_details[0][1] else: self._root_node = InternalNode(prefix, search_key_func=self._search_key_func) self._root_node.set_maximum_size(node_details[0][1].maximum_size) self._root_node._key_width = node_details[0][1]._key_width for split, node in node_details: self._root_node.add_node(split, node) def _node_key(self, node): """Get the key for a node whether it's a tuple or node.""" if type(node) is tuple: node = StaticTuple.from_sequence(node) if type(node) is StaticTuple: return node else: return node._key def unmap(self, key, check_remap=True): """remove key from the map.""" key = StaticTuple.from_sequence(key) self._ensure_root() if type(self._root_node) is InternalNode: unmapped = self._root_node.unmap(self._store, key, check_remap=check_remap) else: unmapped = self._root_node.unmap(self._store, key) self._root_node = unmapped def _check_remap(self): """Check if nodes can be collapsed.""" self._ensure_root() if type(self._root_node) is InternalNode: self._root_node = self._root_node._check_remap(self._store) def _save(self): """Save the map completely. :return: The key of the root node. """ if type(self._root_node) is StaticTuple: # Already saved. return self._root_node keys = list(self._root_node.serialise(self._store)) return keys[-1] class Node(object): """Base class defining the protocol for CHK Map nodes. :ivar _raw_size: The total size of the serialized key:value data, before adding the header bytes, and without prefix compression. """ __slots__ = ('_key', '_len', '_maximum_size', '_key_width', '_raw_size', '_items', '_search_prefix', '_search_key_func' ) def __init__(self, key_width=1): """Create a node. :param key_width: The width of keys for this node. """ self._key = None # Current number of elements self._len = 0 self._maximum_size = 0 self._key_width = key_width # current size in bytes self._raw_size = 0 # The pointers/values this node has - meaning defined by child classes. self._items = {} # The common search prefix self._search_prefix = None def __repr__(self): items_str = str(sorted(self._items)) if len(items_str) > 20: items_str = items_str[:16] + '...]' return '%s(key:%s len:%s size:%s max:%s prefix:%s items:%s)' % ( self.__class__.__name__, self._key, self._len, self._raw_size, self._maximum_size, self._search_prefix, items_str) def key(self): return self._key def __len__(self): return self._len @property def maximum_size(self): """What is the upper limit for adding references to a node.""" return self._maximum_size def set_maximum_size(self, new_size): """Set the size threshold for nodes. :param new_size: The size at which no data is added to a node. 0 for unlimited. """ self._maximum_size = new_size @classmethod def common_prefix(cls, prefix, key): """Given 2 strings, return the longest prefix common to both. :param prefix: This has been the common prefix for other keys, so it is more likely to be the common prefix in this case as well. :param key: Another string to compare to """ if key.startswith(prefix): return prefix pos = -1 # Is there a better way to do this? for pos, (left, right) in enumerate(zip(prefix, key)): if left != right: pos -= 1 break common = prefix[:pos+1] return common @classmethod def common_prefix_for_keys(cls, keys): """Given a list of keys, find their common prefix. :param keys: An iterable of strings. :return: The longest common prefix of all keys. """ common_prefix = None for key in keys: if common_prefix is None: common_prefix = key continue common_prefix = cls.common_prefix(common_prefix, key) if not common_prefix: # if common_prefix is the empty string, then we know it won't # change further return '' return common_prefix # Singleton indicating we have not computed _search_prefix yet _unknown = object() class LeafNode(Node): """A node containing actual key:value pairs. :ivar _items: A dict of key->value items. The key is in tuple form. :ivar _size: The number of bytes that would be used by serializing all of the key/value pairs. """ __slots__ = ('_common_serialised_prefix',) def __init__(self, search_key_func=None): Node.__init__(self) # All of the keys in this leaf node share this common prefix self._common_serialised_prefix = None if search_key_func is None: self._search_key_func = _search_key_plain else: self._search_key_func = search_key_func def __repr__(self): items_str = str(sorted(self._items)) if len(items_str) > 20: items_str = items_str[:16] + '...]' return \ '%s(key:%s len:%s size:%s max:%s prefix:%s keywidth:%s items:%s)' \ % (self.__class__.__name__, self._key, self._len, self._raw_size, self._maximum_size, self._search_prefix, self._key_width, items_str) def _current_size(self): """Answer the current serialised size of this node. This differs from self._raw_size in that it includes the bytes used for the header. """ if self._common_serialised_prefix is None: bytes_for_items = 0 prefix_len = 0 else: # We will store a single string with the common prefix # And then that common prefix will not be stored in any of the # entry lines prefix_len = len(self._common_serialised_prefix) bytes_for_items = (self._raw_size - (prefix_len * self._len)) return (9 # 'chkleaf:\n' + len(str(self._maximum_size)) + 1 + len(str(self._key_width)) + 1 + len(str(self._len)) + 1 + prefix_len + 1 + bytes_for_items) @classmethod def deserialise(klass, bytes, key, search_key_func=None): """Deserialise bytes, with key key, into a LeafNode. :param bytes: The bytes of the node. :param key: The key that the serialised node has. """ key = static_tuple.expect_static_tuple(key) return _deserialise_leaf_node(bytes, key, search_key_func=search_key_func) def iteritems(self, store, key_filter=None): """Iterate over items in the node. :param key_filter: A filter to apply to the node. It should be a list/set/dict or similar repeatedly iterable container. """ if key_filter is not None: # Adjust the filter - short elements go to a prefix filter. All # other items are looked up directly. # XXX: perhaps defaultdict? Profiling filters = {} for key in key_filter: if len(key) == self._key_width: # This filter is meant to match exactly one key, yield it # if we have it. try: yield key, self._items[key] except KeyError: # This key is not present in this map, continue pass else: # Short items, we need to match based on a prefix length_filter = filters.setdefault(len(key), set()) length_filter.add(key) if filters: filters = filters.items() for item in self._items.iteritems(): for length, length_filter in filters: if item[0][:length] in length_filter: yield item break else: for item in self._items.iteritems(): yield item def _key_value_len(self, key, value): # TODO: Should probably be done without actually joining the key, but # then that can be done via the C extension return (len(self._serialise_key(key)) + 1 + len(str(value.count('\n'))) + 1 + len(value) + 1) def _search_key(self, key): return self._search_key_func(key) def _map_no_split(self, key, value): """Map a key to a value. This assumes either the key does not already exist, or you have already removed its size and length from self. :return: True if adding this node should cause us to split. """ self._items[key] = value self._raw_size += self._key_value_len(key, value) self._len += 1 serialised_key = self._serialise_key(key) if self._common_serialised_prefix is None: self._common_serialised_prefix = serialised_key else: self._common_serialised_prefix = self.common_prefix( self._common_serialised_prefix, serialised_key) search_key = self._search_key(key) if self._search_prefix is _unknown: self._compute_search_prefix() if self._search_prefix is None: self._search_prefix = search_key else: self._search_prefix = self.common_prefix( self._search_prefix, search_key) if (self._len > 1 and self._maximum_size and self._current_size() > self._maximum_size): # Check to see if all of the search_keys for this node are # identical. We allow the node to grow under that circumstance # (we could track this as common state, but it is infrequent) if (search_key != self._search_prefix or not self._are_search_keys_identical()): return True return False def _split(self, store): """We have overflowed. Split this node into multiple LeafNodes, return it up the stack so that the next layer creates a new InternalNode and references the new nodes. :return: (common_serialised_prefix, [(node_serialised_prefix, node)]) """ if self._search_prefix is _unknown: raise AssertionError('Search prefix must be known') common_prefix = self._search_prefix split_at = len(common_prefix) + 1 result = {} for key, value in self._items.iteritems(): search_key = self._search_key(key) prefix = search_key[:split_at] # TODO: Generally only 1 key can be exactly the right length, # which means we can only have 1 key in the node pointed # at by the 'prefix\0' key. We might want to consider # folding it into the containing InternalNode rather than # having a fixed length-1 node. # Note this is probably not true for hash keys, as they # may get a '\00' node anywhere, but won't have keys of # different lengths. if len(prefix) < split_at: prefix += '\x00'*(split_at - len(prefix)) if prefix not in result: node = LeafNode(search_key_func=self._search_key_func) node.set_maximum_size(self._maximum_size) node._key_width = self._key_width result[prefix] = node else: node = result[prefix] sub_prefix, node_details = node.map(store, key, value) if len(node_details) > 1: if prefix != sub_prefix: # This node has been split and is now found via a different # path result.pop(prefix) new_node = InternalNode(sub_prefix, search_key_func=self._search_key_func) new_node.set_maximum_size(self._maximum_size) new_node._key_width = self._key_width for split, node in node_details: new_node.add_node(split, node) result[prefix] = new_node return common_prefix, result.items() def map(self, store, key, value): """Map key to value.""" if key in self._items: self._raw_size -= self._key_value_len(key, self._items[key]) self._len -= 1 self._key = None if self._map_no_split(key, value): return self._split(store) else: if self._search_prefix is _unknown: raise AssertionError('%r must be known' % self._search_prefix) return self._search_prefix, [("", self)] _serialise_key = '\x00'.join def serialise(self, store): """Serialise the LeafNode to store. :param store: A VersionedFiles honouring the CHK extensions. :return: An iterable of the keys inserted by this operation. """ lines = ["chkleaf:\n"] lines.append("%d\n" % self._maximum_size) lines.append("%d\n" % self._key_width) lines.append("%d\n" % self._len) if self._common_serialised_prefix is None: lines.append('\n') if len(self._items) != 0: raise AssertionError('If _common_serialised_prefix is None' ' we should have no items') else: lines.append('%s\n' % (self._common_serialised_prefix,)) prefix_len = len(self._common_serialised_prefix) for key, value in sorted(self._items.items()): # Always add a final newline value_lines = osutils.chunks_to_lines([value + '\n']) serialized = "%s\x00%s\n" % (self._serialise_key(key), len(value_lines)) if not serialized.startswith(self._common_serialised_prefix): raise AssertionError('We thought the common prefix was %r' ' but entry %r does not have it in common' % (self._common_serialised_prefix, serialized)) lines.append(serialized[prefix_len:]) lines.extend(value_lines) sha1, _, _ = store.add_lines((None,), (), lines) self._key = StaticTuple("sha1:" + sha1,).intern() bytes = ''.join(lines) if len(bytes) != self._current_size(): raise AssertionError('Invalid _current_size') _get_cache()[self._key] = bytes return [self._key] def refs(self): """Return the references to other CHK's held by this node.""" return [] def _compute_search_prefix(self): """Determine the common search prefix for all keys in this node. :return: A bytestring of the longest search key prefix that is unique within this node. """ search_keys = [self._search_key_func(key) for key in self._items] self._search_prefix = self.common_prefix_for_keys(search_keys) return self._search_prefix def _are_search_keys_identical(self): """Check to see if the search keys for all entries are the same. When using a hash as the search_key it is possible for non-identical keys to collide. If that happens enough, we may try overflow a LeafNode, but as all are collisions, we must not split. """ common_search_key = None for key in self._items: search_key = self._search_key(key) if common_search_key is None: common_search_key = search_key elif search_key != common_search_key: return False return True def _compute_serialised_prefix(self): """Determine the common prefix for serialised keys in this node. :return: A bytestring of the longest serialised key prefix that is unique within this node. """ serialised_keys = [self._serialise_key(key) for key in self._items] self._common_serialised_prefix = self.common_prefix_for_keys( serialised_keys) return self._common_serialised_prefix def unmap(self, store, key): """Unmap key from the node.""" try: self._raw_size -= self._key_value_len(key, self._items[key]) except KeyError: trace.mutter("key %s not found in %r", key, self._items) raise self._len -= 1 del self._items[key] self._key = None # Recompute from scratch self._compute_search_prefix() self._compute_serialised_prefix() return self class InternalNode(Node): """A node that contains references to other nodes. An InternalNode is responsible for mapping search key prefixes to child nodes. :ivar _items: serialised_key => node dictionary. node may be a tuple, LeafNode or InternalNode. """ __slots__ = ('_node_width',) def __init__(self, prefix='', search_key_func=None): Node.__init__(self) # The size of an internalnode with default values and no children. # How many octets key prefixes within this node are. self._node_width = 0 self._search_prefix = prefix if search_key_func is None: self._search_key_func = _search_key_plain else: self._search_key_func = search_key_func def add_node(self, prefix, node): """Add a child node with prefix prefix, and node node. :param prefix: The search key prefix for node. :param node: The node being added. """ if self._search_prefix is None: raise AssertionError("_search_prefix should not be None") if not prefix.startswith(self._search_prefix): raise AssertionError("prefixes mismatch: %s must start with %s" % (prefix,self._search_prefix)) if len(prefix) != len(self._search_prefix) + 1: raise AssertionError("prefix wrong length: len(%s) is not %d" % (prefix, len(self._search_prefix) + 1)) self._len += len(node) if not len(self._items): self._node_width = len(prefix) if self._node_width != len(self._search_prefix) + 1: raise AssertionError("node width mismatch: %d is not %d" % (self._node_width, len(self._search_prefix) + 1)) self._items[prefix] = node self._key = None def _current_size(self): """Answer the current serialised size of this node.""" return (self._raw_size + len(str(self._len)) + len(str(self._key_width)) + len(str(self._maximum_size))) @classmethod def deserialise(klass, bytes, key, search_key_func=None): """Deserialise bytes to an InternalNode, with key key. :param bytes: The bytes of the node. :param key: The key that the serialised node has. :return: An InternalNode instance. """ key = static_tuple.expect_static_tuple(key) return _deserialise_internal_node(bytes, key, search_key_func=search_key_func) def iteritems(self, store, key_filter=None): for node, node_filter in self._iter_nodes(store, key_filter=key_filter): for item in node.iteritems(store, key_filter=node_filter): yield item def _iter_nodes(self, store, key_filter=None, batch_size=None): """Iterate over node objects which match key_filter. :param store: A store to use for accessing content. :param key_filter: A key filter to filter nodes. Only nodes that might contain a key in key_filter will be returned. :param batch_size: If not None, then we will return the nodes that had to be read using get_record_stream in batches, rather than reading them all at once. :return: An iterable of nodes. This function does not have to be fully consumed. (There will be no pending I/O when items are being returned.) """ # Map from chk key ('sha1:...',) to (prefix, key_filter) # prefix is the key in self._items to use, key_filter is the key_filter # entries that would match this node keys = {} shortcut = False if key_filter is None: # yielding all nodes, yield whatever we have, and queue up a read # for whatever we are missing shortcut = True for prefix, node in self._items.iteritems(): if node.__class__ is StaticTuple: keys[node] = (prefix, None) else: yield node, None elif len(key_filter) == 1: # Technically, this path could also be handled by the first check # in 'self._node_width' in length_filters. However, we can handle # this case without spending any time building up the # prefix_to_keys, etc state. # This is a bit ugly, but TIMEIT showed it to be by far the fastest # 0.626us list(key_filter)[0] # is a func() for list(), 2 mallocs, and a getitem # 0.489us [k for k in key_filter][0] # still has the mallocs, avoids the func() call # 0.350us iter(key_filter).next() # has a func() call, and mallocs an iterator # 0.125us for key in key_filter: pass # no func() overhead, might malloc an iterator # 0.105us for key in key_filter: break # no func() overhead, might malloc an iterator, probably # avoids checking an 'else' clause as part of the for for key in key_filter: break search_prefix = self._search_prefix_filter(key) if len(search_prefix) == self._node_width: # This item will match exactly, so just do a dict lookup, and # see what we can return shortcut = True try: node = self._items[search_prefix] except KeyError: # A given key can only match 1 child node, if it isn't # there, then we can just return nothing return if node.__class__ is StaticTuple: keys[node] = (search_prefix, [key]) else: # This is loaded, and the only thing that can match, # return yield node, [key] return if not shortcut: # First, convert all keys into a list of search prefixes # Aggregate common prefixes, and track the keys they come from prefix_to_keys = {} length_filters = {} for key in key_filter: search_prefix = self._search_prefix_filter(key) length_filter = length_filters.setdefault( len(search_prefix), set()) length_filter.add(search_prefix) prefix_to_keys.setdefault(search_prefix, []).append(key) if (self._node_width in length_filters and len(length_filters) == 1): # all of the search prefixes match exactly _node_width. This # means that everything is an exact match, and we can do a # lookup into self._items, rather than iterating over the items # dict. search_prefixes = length_filters[self._node_width] for search_prefix in search_prefixes: try: node = self._items[search_prefix] except KeyError: # We can ignore this one continue node_key_filter = prefix_to_keys[search_prefix] if node.__class__ is StaticTuple: keys[node] = (search_prefix, node_key_filter) else: yield node, node_key_filter else: # The slow way. We walk every item in self._items, and check to # see if there are any matches length_filters = length_filters.items() for prefix, node in self._items.iteritems(): node_key_filter = [] for length, length_filter in length_filters: sub_prefix = prefix[:length] if sub_prefix in length_filter: node_key_filter.extend(prefix_to_keys[sub_prefix]) if node_key_filter: # this key matched something, yield it if node.__class__ is StaticTuple: keys[node] = (prefix, node_key_filter) else: yield node, node_key_filter if keys: # Look in the page cache for some more bytes found_keys = set() for key in keys: try: bytes = _get_cache()[key] except KeyError: continue else: node = _deserialise(bytes, key, search_key_func=self._search_key_func) prefix, node_key_filter = keys[key] self._items[prefix] = node found_keys.add(key) yield node, node_key_filter for key in found_keys: del keys[key] if keys: # demand load some pages. if batch_size is None: # Read all the keys in batch_size = len(keys) key_order = list(keys) for batch_start in range(0, len(key_order), batch_size): batch = key_order[batch_start:batch_start + batch_size] # We have to fully consume the stream so there is no pending # I/O, so we buffer the nodes for now. stream = store.get_record_stream(batch, 'unordered', True) node_and_filters = [] for record in stream: bytes = record.get_bytes_as('fulltext') node = _deserialise(bytes, record.key, search_key_func=self._search_key_func) prefix, node_key_filter = keys[record.key] node_and_filters.append((node, node_key_filter)) self._items[prefix] = node _get_cache()[record.key] = bytes for info in node_and_filters: yield info def map(self, store, key, value): """Map key to value.""" if not len(self._items): raise AssertionError("can't map in an empty InternalNode.") search_key = self._search_key(key) if self._node_width != len(self._search_prefix) + 1: raise AssertionError("node width mismatch: %d is not %d" % (self._node_width, len(self._search_prefix) + 1)) if not search_key.startswith(self._search_prefix): # This key doesn't fit in this index, so we need to split at the # point where it would fit, insert self into that internal node, # and then map this key into that node. new_prefix = self.common_prefix(self._search_prefix, search_key) new_parent = InternalNode(new_prefix, search_key_func=self._search_key_func) new_parent.set_maximum_size(self._maximum_size) new_parent._key_width = self._key_width new_parent.add_node(self._search_prefix[:len(new_prefix)+1], self) return new_parent.map(store, key, value) children = [node for node, _ in self._iter_nodes(store, key_filter=[key])] if children: child = children[0] else: # new child needed: child = self._new_child(search_key, LeafNode) old_len = len(child) if type(child) is LeafNode: old_size = child._current_size() else: old_size = None prefix, node_details = child.map(store, key, value) if len(node_details) == 1: # child may have shrunk, or might be a new node child = node_details[0][1] self._len = self._len - old_len + len(child) self._items[search_key] = child self._key = None new_node = self if type(child) is LeafNode: if old_size is None: # The old node was an InternalNode which means it has now # collapsed, so we need to check if it will chain to a # collapse at this level. trace.mutter("checking remap as InternalNode -> LeafNode") new_node = self._check_remap(store) else: # If the LeafNode has shrunk in size, we may want to run # a remap check. Checking for a remap is expensive though # and the frequency of a successful remap is very low. # Shrinkage by small amounts is common, so we only do the # remap check if the new_size is low or the shrinkage # amount is over a configurable limit. new_size = child._current_size() shrinkage = old_size - new_size if (shrinkage > 0 and new_size < _INTERESTING_NEW_SIZE or shrinkage > _INTERESTING_SHRINKAGE_LIMIT): trace.mutter( "checking remap as size shrunk by %d to be %d", shrinkage, new_size) new_node = self._check_remap(store) if new_node._search_prefix is None: raise AssertionError("_search_prefix should not be None") return new_node._search_prefix, [('', new_node)] # child has overflown - create a new intermediate node. # XXX: This is where we might want to try and expand our depth # to refer to more bytes of every child (which would give us # multiple pointers to child nodes, but less intermediate nodes) child = self._new_child(search_key, InternalNode) child._search_prefix = prefix for split, node in node_details: child.add_node(split, node) self._len = self._len - old_len + len(child) self._key = None return self._search_prefix, [("", self)] def _new_child(self, search_key, klass): """Create a new child node of type klass.""" child = klass() child.set_maximum_size(self._maximum_size) child._key_width = self._key_width child._search_key_func = self._search_key_func self._items[search_key] = child return child def serialise(self, store): """Serialise the node to store. :param store: A VersionedFiles honouring the CHK extensions. :return: An iterable of the keys inserted by this operation. """ for node in self._items.itervalues(): if type(node) is StaticTuple: # Never deserialised. continue if node._key is not None: # Never altered continue for key in node.serialise(store): yield key lines = ["chknode:\n"] lines.append("%d\n" % self._maximum_size) lines.append("%d\n" % self._key_width) lines.append("%d\n" % self._len) if self._search_prefix is None: raise AssertionError("_search_prefix should not be None") lines.append('%s\n' % (self._search_prefix,)) prefix_len = len(self._search_prefix) for prefix, node in sorted(self._items.items()): if type(node) is StaticTuple: key = node[0] else: key = node._key[0] serialised = "%s\x00%s\n" % (prefix, key) if not serialised.startswith(self._search_prefix): raise AssertionError("prefixes mismatch: %s must start with %s" % (serialised, self._search_prefix)) lines.append(serialised[prefix_len:]) sha1, _, _ = store.add_lines((None,), (), lines) self._key = StaticTuple("sha1:" + sha1,).intern() _get_cache()[self._key] = ''.join(lines) yield self._key def _search_key(self, key): """Return the serialised key for key in this node.""" # search keys are fixed width. All will be self._node_width wide, so we # pad as necessary. return (self._search_key_func(key) + '\x00'*self._node_width)[:self._node_width] def _search_prefix_filter(self, key): """Serialise key for use as a prefix filter in iteritems.""" return self._search_key_func(key)[:self._node_width] def _split(self, offset): """Split this node into smaller nodes starting at offset. :param offset: The offset to start the new child nodes at. :return: An iterable of (prefix, node) tuples. prefix is a byte prefix for reaching node. """ if offset >= self._node_width: for node in self._items.values(): for result in node._split(offset): yield result return for key, node in self._items.items(): pass def refs(self): """Return the references to other CHK's held by this node.""" if self._key is None: raise AssertionError("unserialised nodes have no refs.") refs = [] for value in self._items.itervalues(): if type(value) is StaticTuple: refs.append(value) else: refs.append(value.key()) return refs def _compute_search_prefix(self, extra_key=None): """Return the unique key prefix for this node. :return: A bytestring of the longest search key prefix that is unique within this node. """ self._search_prefix = self.common_prefix_for_keys(self._items) return self._search_prefix def unmap(self, store, key, check_remap=True): """Remove key from this node and its children.""" if not len(self._items): raise AssertionError("can't unmap in an empty InternalNode.") children = [node for node, _ in self._iter_nodes(store, key_filter=[key])] if children: child = children[0] else: raise KeyError(key) self._len -= 1 unmapped = child.unmap(store, key) self._key = None search_key = self._search_key(key) if len(unmapped) == 0: # All child nodes are gone, remove the child: del self._items[search_key] unmapped = None else: # Stash the returned node self._items[search_key] = unmapped if len(self._items) == 1: # this node is no longer needed: return self._items.values()[0] if type(unmapped) is InternalNode: return self if check_remap: return self._check_remap(store) else: return self def _check_remap(self, store): """Check if all keys contained by children fit in a single LeafNode. :param store: A store to use for reading more nodes :return: Either self, or a new LeafNode which should replace self. """ # Logic for how we determine when we need to rebuild # 1) Implicitly unmap() is removing a key which means that the child # nodes are going to be shrinking by some extent. # 2) If all children are LeafNodes, it is possible that they could be # combined into a single LeafNode, which can then completely replace # this internal node with a single LeafNode # 3) If *one* child is an InternalNode, we assume it has already done # all the work to determine that its children cannot collapse, and # we can then assume that those nodes *plus* the current nodes don't # have a chance of collapsing either. # So a very cheap check is to just say if 'unmapped' is an # InternalNode, we don't have to check further. # TODO: Another alternative is to check the total size of all known # LeafNodes. If there is some formula we can use to determine the # final size without actually having to read in any more # children, it would be nice to have. However, we have to be # careful with stuff like nodes that pull out the common prefix # of each key, as adding a new key can change the common prefix # and cause size changes greater than the length of one key. # So for now, we just add everything to a new Leaf until it # splits, as we know that will give the right answer new_leaf = LeafNode(search_key_func=self._search_key_func) new_leaf.set_maximum_size(self._maximum_size) new_leaf._key_width = self._key_width # A batch_size of 16 was chosen because: # a) In testing, a 4k page held 14 times. So if we have more than 16 # leaf nodes we are unlikely to hold them in a single new leaf # node. This still allows for 1 round trip # b) With 16-way fan out, we can still do a single round trip # c) With 255-way fan out, we don't want to read all 255 and destroy # the page cache, just to determine that we really don't need it. for node, _ in self._iter_nodes(store, batch_size=16): if type(node) is InternalNode: # Without looking at any leaf nodes, we are sure return self for key, value in node._items.iteritems(): if new_leaf._map_no_split(key, value): return self trace.mutter("remap generated a new LeafNode") return new_leaf def _deserialise(bytes, key, search_key_func): """Helper for repositorydetails - convert bytes to a node.""" if bytes.startswith("chkleaf:\n"): node = LeafNode.deserialise(bytes, key, search_key_func=search_key_func) elif bytes.startswith("chknode:\n"): node = InternalNode.deserialise(bytes, key, search_key_func=search_key_func) else: raise AssertionError("Unknown node type.") return node class CHKMapDifference(object): """Iterate the stored pages and key,value pairs for (new - old). This class provides a generator over the stored CHK pages and the (key, value) pairs that are in any of the new maps and not in any of the old maps. Note that it may yield chk pages that are common (especially root nodes), but it won't yield (key,value) pairs that are common. """ def __init__(self, store, new_root_keys, old_root_keys, search_key_func, pb=None): # TODO: Should we add a StaticTuple barrier here? It would be nice to # force callers to use StaticTuple, because there will often be # lots of keys passed in here. And even if we cast it locally, # that just meanst that we will have *both* a StaticTuple and a # tuple() in memory, referring to the same object. (so a net # increase in memory, not a decrease.) self._store = store self._new_root_keys = new_root_keys self._old_root_keys = old_root_keys self._pb = pb # All uninteresting chks that we have seen. By the time they are added # here, they should be either fully ignored, or queued up for # processing # TODO: This might grow to a large size if there are lots of merge # parents, etc. However, it probably doesn't scale to O(history) # like _processed_new_refs does. self._all_old_chks = set(self._old_root_keys) # All items that we have seen from the old_root_keys self._all_old_items = set() # These are interesting items which were either read, or already in the # interesting queue (so we don't need to walk them again) # TODO: processed_new_refs becomes O(all_chks), consider switching to # SimpleSet here. self._processed_new_refs = set() self._search_key_func = search_key_func # The uninteresting and interesting nodes to be searched self._old_queue = [] self._new_queue = [] # Holds the (key, value) items found when processing the root nodes, # waiting for the uninteresting nodes to be walked self._new_item_queue = [] self._state = None def _read_nodes_from_store(self, keys): # We chose not to use _get_cache(), because we think in # terms of records to be yielded. Also, we expect to touch each page # only 1 time during this code. (We may want to evaluate saving the # raw bytes into the page cache, which would allow a working tree # update after the fetch to not have to read the bytes again.) as_st = StaticTuple.from_sequence stream = self._store.get_record_stream(keys, 'unordered', True) for record in stream: if self._pb is not None: self._pb.tick() if record.storage_kind == 'absent': raise errors.NoSuchRevision(self._store, record.key) bytes = record.get_bytes_as('fulltext') node = _deserialise(bytes, record.key, search_key_func=self._search_key_func) if type(node) is InternalNode: # Note we don't have to do node.refs() because we know that # there are no children that have been pushed into this node # Note: Using as_st() here seemed to save 1.2MB, which would # indicate that we keep 100k prefix_refs around while # processing. They *should* be shorter lived than that... # It does cost us ~10s of processing time #prefix_refs = [as_st(item) for item in node._items.iteritems()] prefix_refs = node._items.items() items = [] else: prefix_refs = [] # Note: We don't use a StaticTuple here. Profiling showed a # minor memory improvement (0.8MB out of 335MB peak 0.2%) # But a significant slowdown (15s / 145s, or 10%) items = node._items.items() yield record, node, prefix_refs, items def _read_old_roots(self): old_chks_to_enqueue = [] all_old_chks = self._all_old_chks for record, node, prefix_refs, items in \ self._read_nodes_from_store(self._old_root_keys): # Uninteresting node prefix_refs = [p_r for p_r in prefix_refs if p_r[1] not in all_old_chks] new_refs = [p_r[1] for p_r in prefix_refs] all_old_chks.update(new_refs) # TODO: This might be a good time to turn items into StaticTuple # instances and possibly intern them. However, this does not # impact 'initial branch' performance, so I'm not worrying # about this yet self._all_old_items.update(items) # Queue up the uninteresting references # Don't actually put them in the 'to-read' queue until we have # finished checking the interesting references old_chks_to_enqueue.extend(prefix_refs) return old_chks_to_enqueue def _enqueue_old(self, new_prefixes, old_chks_to_enqueue): # At this point, we have read all the uninteresting and interesting # items, so we can queue up the uninteresting stuff, knowing that we've # handled the interesting ones for prefix, ref in old_chks_to_enqueue: not_interesting = True for i in xrange(len(prefix), 0, -1): if prefix[:i] in new_prefixes: not_interesting = False break if not_interesting: # This prefix is not part of the remaining 'interesting set' continue self._old_queue.append(ref) def _read_all_roots(self): """Read the root pages. This is structured as a generator, so that the root records can be yielded up to whoever needs them without any buffering. """ # This is the bootstrap phase if not self._old_root_keys: # With no old_root_keys we can just shortcut and be ready # for _flush_new_queue self._new_queue = list(self._new_root_keys) return old_chks_to_enqueue = self._read_old_roots() # filter out any root keys that are already known to be uninteresting new_keys = set(self._new_root_keys).difference(self._all_old_chks) # These are prefixes that are present in new_keys that we are # thinking to yield new_prefixes = set() # We are about to yield all of these, so we don't want them getting # added a second time processed_new_refs = self._processed_new_refs processed_new_refs.update(new_keys) for record, node, prefix_refs, items in \ self._read_nodes_from_store(new_keys): # At this level, we now know all the uninteresting references # So we filter and queue up whatever is remaining prefix_refs = [p_r for p_r in prefix_refs if p_r[1] not in self._all_old_chks and p_r[1] not in processed_new_refs] refs = [p_r[1] for p_r in prefix_refs] new_prefixes.update([p_r[0] for p_r in prefix_refs]) self._new_queue.extend(refs) # TODO: We can potentially get multiple items here, however the # current design allows for this, as callers will do the work # to make the results unique. We might profile whether we # gain anything by ensuring unique return values for items # TODO: This might be a good time to cast to StaticTuple, as # self._new_item_queue will hold the contents of multiple # records for an extended lifetime new_items = [item for item in items if item not in self._all_old_items] self._new_item_queue.extend(new_items) new_prefixes.update([self._search_key_func(item[0]) for item in new_items]) processed_new_refs.update(refs) yield record # For new_prefixes we have the full length prefixes queued up. # However, we also need possible prefixes. (If we have a known ref to # 'ab', then we also need to include 'a'.) So expand the # new_prefixes to include all shorter prefixes for prefix in list(new_prefixes): new_prefixes.update([prefix[:i] for i in xrange(1, len(prefix))]) self._enqueue_old(new_prefixes, old_chks_to_enqueue) def _flush_new_queue(self): # No need to maintain the heap invariant anymore, just pull things out # and process them refs = set(self._new_queue) self._new_queue = [] # First pass, flush all interesting items and convert to using direct refs all_old_chks = self._all_old_chks processed_new_refs = self._processed_new_refs all_old_items = self._all_old_items new_items = [item for item in self._new_item_queue if item not in all_old_items] self._new_item_queue = [] if new_items: yield None, new_items refs = refs.difference(all_old_chks) processed_new_refs.update(refs) while refs: # TODO: Using a SimpleSet for self._processed_new_refs and # saved as much as 10MB of peak memory. However, it requires # implementing a non-pyrex version. next_refs = set() next_refs_update = next_refs.update # Inlining _read_nodes_from_store improves 'bzr branch bzr.dev' # from 1m54s to 1m51s. Consider it. for record, _, p_refs, items in self._read_nodes_from_store(refs): if all_old_items: # using the 'if' check saves about 145s => 141s, when # streaming initial branch of Launchpad data. items = [item for item in items if item not in all_old_items] yield record, items next_refs_update([p_r[1] for p_r in p_refs]) del p_refs # set1.difference(set/dict) walks all of set1, and checks if it # exists in 'other'. # set1.difference(iterable) walks all of iterable, and does a # 'difference_update' on a clone of set1. Pick wisely based on the # expected sizes of objects. # in our case it is expected that 'new_refs' will always be quite # small. next_refs = next_refs.difference(all_old_chks) next_refs = next_refs.difference(processed_new_refs) processed_new_refs.update(next_refs) refs = next_refs def _process_next_old(self): # Since we don't filter uninteresting any further than during # _read_all_roots, process the whole queue in a single pass. refs = self._old_queue self._old_queue = [] all_old_chks = self._all_old_chks for record, _, prefix_refs, items in self._read_nodes_from_store(refs): # TODO: Use StaticTuple here? self._all_old_items.update(items) refs = [r for _,r in prefix_refs if r not in all_old_chks] self._old_queue.extend(refs) all_old_chks.update(refs) def _process_queues(self): while self._old_queue: self._process_next_old() return self._flush_new_queue() def process(self): for record in self._read_all_roots(): yield record, [] for record, items in self._process_queues(): yield record, items def iter_interesting_nodes(store, interesting_root_keys, uninteresting_root_keys, pb=None): """Given root keys, find interesting nodes. Evaluate nodes referenced by interesting_root_keys. Ones that are also referenced from uninteresting_root_keys are not considered interesting. :param interesting_root_keys: keys which should be part of the "interesting" nodes (which will be yielded) :param uninteresting_root_keys: keys which should be filtered out of the result set. :return: Yield (interesting record, {interesting key:values}) """ iterator = CHKMapDifference(store, interesting_root_keys, uninteresting_root_keys, search_key_func=store._search_key_func, pb=pb) return iterator.process() try: from bzrlib._chk_map_pyx import ( _bytes_to_text_key, _search_key_16, _search_key_255, _deserialise_leaf_node, _deserialise_internal_node, ) except ImportError, e: osutils.failed_to_load_extension(e) from bzrlib._chk_map_py import ( _bytes_to_text_key, _search_key_16, _search_key_255, _deserialise_leaf_node, _deserialise_internal_node, ) search_key_registry.register('hash-16-way', _search_key_16) search_key_registry.register('hash-255-way', _search_key_255) def _check_key(key): """Helper function to assert that a key is properly formatted. This generally shouldn't be used in production code, but it can be helpful to debug problems. """ if type(key) is not StaticTuple: raise TypeError('key %r is not StaticTuple but %s' % (key, type(key))) if len(key) != 1: raise ValueError('key %r should have length 1, not %d' % (key, len(key),)) if type(key[0]) is not str: raise TypeError('key %r should hold a str, not %r' % (key, type(key[0]))) if not key[0].startswith('sha1:'): raise ValueError('key %r should point to a sha1:' % (key,)) bzr-2.7.0/bzrlib/chk_serializer.py0000644000000000000000000002362611710550135015306 0ustar 00000000000000# Copyright (C) 2008, 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Serializer object for CHK based inventory storage.""" from __future__ import absolute_import from cStringIO import StringIO from bzrlib import lazy_import lazy_import.lazy_import(globals(), """ from bzrlib import ( xml_serializer, ) """) from bzrlib import ( bencode, cache_utf8, errors, revision as _mod_revision, serializer, ) def _validate_properties(props, _decode=cache_utf8._utf8_decode): # TODO: we really want an 'isascii' check for key # Cast the utf8 properties into Unicode 'in place' for key, value in props.iteritems(): props[key] = _decode(value)[0] return props def _is_format_10(value): if value != 10: raise ValueError('Format number was not recognized, expected 10 got %d' % (value,)) return 10 class BEncodeRevisionSerializer1(object): """Simple revision serializer based around bencode. """ squashes_xml_invalid_characters = False # Maps {key:(Revision attribute, bencode_type, validator)} # This tells us what kind we expect bdecode to create, what variable on # Revision we should be using, and a function to call to validate/transform # the type. # TODO: add a 'validate_utf8' for things like revision_id and file_id # and a validator for parent-ids _schema = {'format': (None, int, _is_format_10), 'committer': ('committer', str, cache_utf8.decode), 'timezone': ('timezone', int, None), 'timestamp': ('timestamp', str, float), 'revision-id': ('revision_id', str, None), 'parent-ids': ('parent_ids', list, None), 'inventory-sha1': ('inventory_sha1', str, None), 'message': ('message', str, cache_utf8.decode), 'properties': ('properties', dict, _validate_properties), } def write_revision_to_string(self, rev): encode_utf8 = cache_utf8._utf8_encode # Use a list of tuples rather than a dict # This lets us control the ordering, so that we are able to create # smaller deltas ret = [ ("format", 10), ("committer", encode_utf8(rev.committer)[0]), ] if rev.timezone is not None: ret.append(("timezone", rev.timezone)) # For bzr revisions, the most common property is just 'branch-nick' # which changes infrequently. revprops = {} for key, value in rev.properties.iteritems(): revprops[key] = encode_utf8(value)[0] ret.append(('properties', revprops)) ret.extend([ ("timestamp", "%.3f" % rev.timestamp), ("revision-id", rev.revision_id), ("parent-ids", rev.parent_ids), ("inventory-sha1", rev.inventory_sha1), ("message", encode_utf8(rev.message)[0]), ]) return bencode.bencode(ret) def write_revision(self, rev, f): f.write(self.write_revision_to_string(rev)) def read_revision_from_string(self, text): # TODO: consider writing a Revision decoder, rather than using the # generic bencode decoder # However, to decode all 25k revisions of bzr takes approx 1.3s # If we remove all extra validation that goes down to about 1.2s. # Of that time, probably 0.6s is spend in bencode.bdecode(). # Regardless 'time bzr log' of everything is 7+s, so 1.3s to # extract revision texts isn't a majority of time. ret = bencode.bdecode(text) if not isinstance(ret, list): raise ValueError("invalid revision text") schema = self._schema # timezone is allowed to be missing, but should be set bits = {'timezone': None} for key, value in ret: # Will raise KeyError if not a valid part of the schema, or an # entry is given 2 times. var_name, expected_type, validator = schema[key] if value.__class__ is not expected_type: raise ValueError('key %s did not conform to the expected type' ' %s, but was %s' % (key, expected_type, type(value))) if validator is not None: value = validator(value) bits[var_name] = value if len(bits) != len(schema): missing = [key for key, (var_name, _, _) in schema.iteritems() if var_name not in bits] raise ValueError('Revision text was missing expected keys %s.' ' text %r' % (missing, text)) del bits[None] # Get rid of 'format' since it doesn't get mapped rev = _mod_revision.Revision(**bits) return rev def read_revision(self, f): return self.read_revision_from_string(f.read()) class CHKSerializer(serializer.Serializer): """A CHKInventory based serializer with 'plain' behaviour.""" format_num = '9' revision_format_num = None support_altered_by_hack = False supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference']) def __init__(self, node_size, search_key_name): self.maximum_size = node_size self.search_key_name = search_key_name def _unpack_inventory(self, elt, revision_id=None, entry_cache=None, return_from_cache=False): """Construct from XML Element""" inv = xml_serializer.unpack_inventory_flat(elt, self.format_num, xml_serializer.unpack_inventory_entry, entry_cache, return_from_cache) return inv def read_inventory_from_string(self, xml_string, revision_id=None, entry_cache=None, return_from_cache=False): """Read xml_string into an inventory object. :param xml_string: The xml to read. :param revision_id: If not-None, the expected revision id of the inventory. :param entry_cache: An optional cache of InventoryEntry objects. If supplied we will look up entries via (file_id, revision_id) which should map to a valid InventoryEntry (File/Directory/etc) object. :param return_from_cache: Return entries directly from the cache, rather than copying them first. This is only safe if the caller promises not to mutate the returned inventory entries, but it can make some operations significantly faster. """ try: return self._unpack_inventory( xml_serializer.fromstring(xml_string), revision_id, entry_cache=entry_cache, return_from_cache=return_from_cache) except xml_serializer.ParseError, e: raise errors.UnexpectedInventoryFormat(e) def read_inventory(self, f, revision_id=None): """Read an inventory from a file-like object.""" try: try: return self._unpack_inventory(self._read_element(f), revision_id=None) finally: f.close() except xml_serializer.ParseError, e: raise errors.UnexpectedInventoryFormat(e) def write_inventory_to_lines(self, inv): """Return a list of lines with the encoded inventory.""" return self.write_inventory(inv, None) def write_inventory_to_string(self, inv, working=False): """Just call write_inventory with a StringIO and return the value. :param working: If True skip history data - text_sha1, text_size, reference_revision, symlink_target. """ sio = StringIO() self.write_inventory(inv, sio, working) return sio.getvalue() def write_inventory(self, inv, f, working=False): """Write inventory to a file. :param inv: the inventory to write. :param f: the file to write. (May be None if the lines are the desired output). :param working: If True skip history data - text_sha1, text_size, reference_revision, symlink_target. :return: The inventory as a list of lines. """ output = [] append = output.append if inv.revision_id is not None: revid1 = ' revision_id="' revid2 = xml_serializer.encode_and_escape(inv.revision_id) else: revid1 = "" revid2 = "" append('\n' % ( self.format_num, revid1, revid2)) append(' self.chunk_size: raise AssertionError('Somehow we ended up with too much' ' compressed data, %d > %d' % (self.bytes_out_len, self.chunk_size)) nulls_needed = self.chunk_size - self.bytes_out_len if nulls_needed: self.bytes_list.append("\x00" * nulls_needed) return self.bytes_list, self.unused_bytes, nulls_needed def set_optimize(self, for_size=True): """Change how we optimize our writes. :param for_size: If True, optimize for minimum space usage, otherwise optimize for fastest writing speed. :return: None """ if for_size: opts = ChunkWriter._repack_opts_for_size else: opts = ChunkWriter._repack_opts_for_speed self._max_repack, self._max_zsync = opts def _recompress_all_bytes_in(self, extra_bytes=None): """Recompress the current bytes_in, and optionally more. :param extra_bytes: Optional, if supplied we will add it with Z_SYNC_FLUSH :return: (bytes_out, bytes_out_len, alt_compressed) * bytes_out: is the compressed bytes returned from the compressor * bytes_out_len: the length of the compressed output * compressor: An object with everything packed in so far, and Z_SYNC_FLUSH called. """ compressor = zlib.compressobj() bytes_out = [] append = bytes_out.append compress = compressor.compress for accepted_bytes in self.bytes_in: out = compress(accepted_bytes) if out: append(out) if extra_bytes: out = compress(extra_bytes) out += compressor.flush(Z_SYNC_FLUSH) append(out) bytes_out_len = sum(map(len, bytes_out)) return bytes_out, bytes_out_len, compressor def write(self, bytes, reserved=False): """Write some bytes to the chunk. If the bytes fit, False is returned. Otherwise True is returned and the bytes have not been added to the chunk. :param bytes: The bytes to include :param reserved: If True, we can use the space reserved in the constructor. """ if self.num_repack > self._max_repack and not reserved: self.unused_bytes = bytes return True if reserved: capacity = self.chunk_size else: capacity = self.chunk_size - self.reserved_size comp = self.compressor # Check to see if the currently unflushed bytes would fit with a bit of # room to spare, assuming no compression. next_unflushed = self.unflushed_in_bytes + len(bytes) remaining_capacity = capacity - self.bytes_out_len - 10 if (next_unflushed < remaining_capacity): # looks like it will fit out = comp.compress(bytes) if out: self.bytes_list.append(out) self.bytes_out_len += len(out) self.bytes_in.append(bytes) self.unflushed_in_bytes += len(bytes) else: # This may or may not fit, try to add it with Z_SYNC_FLUSH # Note: It is tempting to do this as a look-ahead pass, and to # 'copy()' the compressor before flushing. However, it seems # that Which means that it is the same thing as increasing # repack, similar cost, same benefit. And this way we still # have the 'repack' knob that can be adjusted, and not depend # on a platform-specific 'copy()' function. self.num_zsync += 1 if self._max_repack == 0 and self.num_zsync > self._max_zsync: self.num_repack += 1 self.unused_bytes = bytes return True out = comp.compress(bytes) out += comp.flush(Z_SYNC_FLUSH) self.unflushed_in_bytes = 0 if out: self.bytes_list.append(out) self.bytes_out_len += len(out) # We are a bit extra conservative, because it seems that you *can* # get better compression with Z_SYNC_FLUSH than a full compress. It # is probably very rare, but we were able to trigger it. if self.num_repack == 0: safety_margin = 100 else: safety_margin = 10 if self.bytes_out_len + safety_margin <= capacity: # It fit, so mark it added self.bytes_in.append(bytes) else: # We are over budget, try to squeeze this in without any # Z_SYNC_FLUSH calls self.num_repack += 1 (bytes_out, this_len, compressor) = self._recompress_all_bytes_in(bytes) if self.num_repack >= self._max_repack: # When we get *to* _max_repack, bump over so that the # earlier > _max_repack will be triggered. self.num_repack += 1 if this_len + 10 > capacity: (bytes_out, this_len, compressor) = self._recompress_all_bytes_in() self.compressor = compressor # Force us to not allow more data self.num_repack = self._max_repack + 1 self.bytes_list = bytes_out self.bytes_out_len = this_len self.unused_bytes = bytes return True else: # This fits when we pack it tighter, so use the new packing self.compressor = compressor self.bytes_in.append(bytes) self.bytes_list = bytes_out self.bytes_out_len = this_len return False bzr-2.7.0/bzrlib/clean_tree.py0000644000000000000000000001121711673360271014412 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import errno import os import shutil from bzrlib import ( controldir, errors, ui, ) from bzrlib.osutils import isdir from bzrlib.trace import note from bzrlib.workingtree import WorkingTree from bzrlib.i18n import gettext def is_detritus(subp): """Return True if the supplied path is detritus, False otherwise""" return subp.endswith('.THIS') or subp.endswith('.BASE') or\ subp.endswith('.OTHER') or subp.endswith('~') or subp.endswith('.tmp') def iter_deletables(tree, unknown=False, ignored=False, detritus=False): """Iterate through files that may be deleted""" for subp in tree.extras(): if detritus and is_detritus(subp): yield tree.abspath(subp), subp continue if tree.is_ignored(subp): if ignored: yield tree.abspath(subp), subp else: if unknown: yield tree.abspath(subp), subp def clean_tree(directory, unknown=False, ignored=False, detritus=False, dry_run=False, no_prompt=False): """Remove files in the specified classes from the tree""" tree = WorkingTree.open_containing(directory)[0] tree.lock_read() try: deletables = list(iter_deletables(tree, unknown=unknown, ignored=ignored, detritus=detritus)) deletables = _filter_out_nested_bzrdirs(deletables) if len(deletables) == 0: note(gettext('Nothing to delete.')) return 0 if not no_prompt: for path, subp in deletables: ui.ui_factory.note(subp) prompt = gettext('Are you sure you wish to delete these') if not ui.ui_factory.get_boolean(prompt): ui.ui_factory.note(gettext('Canceled')) return 0 delete_items(deletables, dry_run=dry_run) finally: tree.unlock() def _filter_out_nested_bzrdirs(deletables): result = [] for path, subp in deletables: # bzr won't recurse into unknowns/ignored directories by default # so we don't pay a penalty for checking subdirs of path for nested # bzrdir. # That said we won't detect the branch in the subdir of non-branch # directory and therefore delete it. (worth to FIXME?) if isdir(path): try: controldir.ControlDir.open(path) except errors.NotBranchError: result.append((path,subp)) else: # TODO may be we need to notify user about skipped directories? pass else: result.append((path,subp)) return result def delete_items(deletables, dry_run=False): """Delete files in the deletables iterable""" def onerror(function, path, excinfo): """Show warning for errors seen by rmtree. """ # Handle only permission error while removing files. # Other errors are re-raised. if function is not os.remove or excinfo[1].errno != errno.EACCES: raise ui.ui_factory.show_warning(gettext('unable to remove %s') % path) has_deleted = False for path, subp in deletables: if not has_deleted: note(gettext("deleting paths:")) has_deleted = True if not dry_run: if isdir(path): shutil.rmtree(path, onerror=onerror) else: try: os.unlink(path) note(' ' + subp) except OSError, e: # We handle only permission error here if e.errno != errno.EACCES: raise e ui.ui_factory.show_warning(gettext( 'unable to remove "{0}": {1}.').format( path, e.strerror)) else: note(' ' + subp) if not has_deleted: note(gettext("No files deleted.")) bzr-2.7.0/bzrlib/cleanup.py0000644000000000000000000001457211673635356013760 0ustar 00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Helpers for managing cleanup functions and the errors they might raise. The usual way to run cleanup code in Python is:: try: do_something() finally: cleanup_something() However if both `do_something` and `cleanup_something` raise an exception Python will forget the original exception and propagate the one from cleanup_something. Unfortunately, this is almost always much less useful than the original exception. If you want to be certain that the first, and only the first, error is raised, then use:: operation = OperationWithCleanups(do_something) operation.add_cleanup(cleanup_something) operation.run_simple() This is more inconvenient (because you need to make every try block a function), but will ensure that the first error encountered is the one raised, while also ensuring all cleanups are run. See OperationWithCleanups for more details. """ from __future__ import absolute_import from collections import deque import sys from bzrlib import ( debug, trace, ) def _log_cleanup_error(exc): trace.mutter('Cleanup failed:') trace.log_exception_quietly() if 'cleanup' in debug.debug_flags: trace.warning('bzr: warning: Cleanup failed: %s', exc) def _run_cleanup(func, *args, **kwargs): """Run func(*args, **kwargs), logging but not propagating any error it raises. :returns: True if func raised no errors, else False. """ try: func(*args, **kwargs) except KeyboardInterrupt: raise except Exception, exc: _log_cleanup_error(exc) return False return True def _run_cleanups(funcs): """Run a series of cleanup functions.""" for func, args, kwargs in funcs: _run_cleanup(func, *args, **kwargs) class ObjectWithCleanups(object): """A mixin for objects that hold a cleanup list. Subclass or client code can call add_cleanup and then later `cleanup_now`. """ def __init__(self): self.cleanups = deque() def add_cleanup(self, cleanup_func, *args, **kwargs): """Add a cleanup to run. Cleanups may be added at any time. Cleanups will be executed in LIFO order. """ self.cleanups.appendleft((cleanup_func, args, kwargs)) def cleanup_now(self): _run_cleanups(self.cleanups) self.cleanups.clear() class OperationWithCleanups(ObjectWithCleanups): """A way to run some code with a dynamic cleanup list. This provides a way to add cleanups while the function-with-cleanups is running. Typical use:: operation = OperationWithCleanups(some_func) operation.run(args...) where `some_func` is:: def some_func(operation, args, ...): do_something() operation.add_cleanup(something) # etc Note that the first argument passed to `some_func` will be the OperationWithCleanups object. To invoke `some_func` without that, use `run_simple` instead of `run`. """ def __init__(self, func): super(OperationWithCleanups, self).__init__() self.func = func def run(self, *args, **kwargs): return _do_with_cleanups( self.cleanups, self.func, self, *args, **kwargs) def run_simple(self, *args, **kwargs): return _do_with_cleanups( self.cleanups, self.func, *args, **kwargs) def _do_with_cleanups(cleanup_funcs, func, *args, **kwargs): """Run `func`, then call all the cleanup_funcs. All the cleanup_funcs are guaranteed to be run. The first exception raised by func or any of the cleanup_funcs is the one that will be propagted by this function (subsequent errors are caught and logged). Conceptually similar to:: try: return func(*args, **kwargs) finally: for cleanup, cargs, ckwargs in cleanup_funcs: cleanup(*cargs, **ckwargs) It avoids several problems with using try/finally directly: * an exception from func will not be obscured by a subsequent exception from a cleanup. * an exception from a cleanup will not prevent other cleanups from running (but the first exception encountered is still the one propagated). Unike `_run_cleanup`, `_do_with_cleanups` can propagate an exception from a cleanup, but only if there is no exception from func. """ # As correct as Python 2.4 allows. try: result = func(*args, **kwargs) except: # We have an exception from func already, so suppress cleanup errors. _run_cleanups(cleanup_funcs) raise else: # No exception from func, so allow the first exception from # cleanup_funcs to propagate if one occurs (but only after running all # of them). exc_info = None for cleanup, c_args, c_kwargs in cleanup_funcs: # XXX: Hmm, if KeyboardInterrupt arrives at exactly this line, we # won't run all cleanups... perhaps we should temporarily install a # SIGINT handler? if exc_info is None: try: cleanup(*c_args, **c_kwargs) except: # This is the first cleanup to fail, so remember its # details. exc_info = sys.exc_info() else: # We already have an exception to propagate, so log any errors # but don't propagate them. _run_cleanup(cleanup, *c_args, **kwargs) if exc_info is not None: try: raise exc_info[0], exc_info[1], exc_info[2] finally: del exc_info # No error, so we can return the result return result bzr-2.7.0/bzrlib/cmd_test_script.py0000644000000000000000000000407011673635356015507 0ustar 00000000000000# Copyright (C) 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Front-end command for shell-like test scripts. See doc/developers/testing.txt for more explanations. This module should be importable even if testtools aren't available. """ from __future__ import absolute_import import os from bzrlib import ( commands, option, ) class cmd_test_script(commands.Command): """Run a shell-like test from a file.""" hidden = True takes_args = ['infile'] takes_options = [ option.Option('null-output', help='Null command outputs match any output.'), ] @commands.display_command def run(self, infile, null_output=False): # local imports to defer testtools dependency from bzrlib import tests from bzrlib.tests.script import TestCaseWithTransportAndScript f = open(infile) try: script = f.read() finally: f.close() class Test(TestCaseWithTransportAndScript): script = None # Set before running def test_it(self): self.run_script(script, null_output_matches_anything=null_output) runner = tests.TextTestRunner(stream=self.outf) test = Test('test_it') test.path = os.path.realpath(infile) res = runner.run(test) return len(res.errors) + len(res.failures) bzr-2.7.0/bzrlib/cmd_version_info.py0000644000000000000000000001164011677332046015637 0ustar 00000000000000# Copyright (C) 2005-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Commands for generating snapshot information about a bzr tree.""" from __future__ import absolute_import from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( branch, errors, version_info_formats, workingtree, ) from bzrlib.i18n import gettext """) from bzrlib.commands import Command from bzrlib.option import Option, RegistryOption def _parse_version_info_format(format): """Convert a string passed by the user into a VersionInfoFormat. This looks in the version info format registry, and if the format cannot be found, generates a useful error exception. """ try: return version_info_formats.get_builder(format) except KeyError: formats = version_info_formats.get_builder_formats() raise errors.BzrCommandError(gettext('No known version info format {0}.' ' Supported types are: {1}').format( format, formats)) class cmd_version_info(Command): __doc__ = """Show version information about this tree. You can use this command to add information about version into source code of an application. The output can be in one of the supported formats or in a custom format based on a template. For example:: bzr version-info --custom \\ --template="#define VERSION_INFO \\"Project 1.2.3 (r{revno})\\"\\n" will produce a C header file with formatted string containing the current revision number. Other supported variables in templates are: * {date} - date of the last revision * {build_date} - current date * {revno} - revision number * {revision_id} - revision id * {branch_nick} - branch nickname * {clean} - 0 if the source tree contains uncommitted changes, otherwise 1 """ takes_options = [RegistryOption('format', 'Select the output format.', value_switches=True, lazy_registry=('bzrlib.version_info_formats', 'format_registry')), Option('all', help='Include all possible information.'), Option('check-clean', help='Check if tree is clean.'), Option('include-history', help='Include the revision-history.'), Option('include-file-revisions', help='Include the last revision for each file.'), Option('template', type=str, help='Template for the output.'), 'revision', ] takes_args = ['location?'] encoding_type = 'exact' def run(self, location=None, format=None, all=False, check_clean=False, include_history=False, include_file_revisions=False, template=None, revision=None): if revision and len(revision) > 1: raise errors.BzrCommandError( gettext('bzr version-info --revision takes exactly' ' one revision specifier')) if location is None: location = '.' if format is None: format = version_info_formats.format_registry.get() try: wt = workingtree.WorkingTree.open_containing(location)[0] except errors.NoWorkingTree: b = branch.Branch.open(location) wt = None else: b = wt.branch if all: include_history = True check_clean = True include_file_revisions = True if template: include_history = True include_file_revisions = True if '{clean}' in template: check_clean = True if revision is not None: revision_id = revision[0].as_revision_id(b) else: revision_id = None builder = format(b, working_tree=wt, check_for_clean=check_clean, include_revision_history=include_history, include_file_revisions=include_file_revisions, template=template, revision_id=revision_id) builder.generate(self.outf) bzr-2.7.0/bzrlib/cmdline.py0000644000000000000000000001233212150631743013717 0ustar 00000000000000# Copyright (C) 2010-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Unicode-compatible command-line splitter for all platforms. The user-visible behaviour of this module is described in configuring_bazaar.txt. """ from __future__ import absolute_import import re _whitespace_match = re.compile(u'\s', re.UNICODE).match class _PushbackSequence(object): def __init__(self, orig): self._iter = iter(orig) self._pushback_buffer = [] def next(self): if len(self._pushback_buffer) > 0: return self._pushback_buffer.pop() else: return self._iter.next() def pushback(self, char): self._pushback_buffer.append(char) def __iter__(self): return self class _Whitespace(object): def process(self, next_char, context): if _whitespace_match(next_char): if len(context.token) > 0: return None else: return self elif next_char in context.allowed_quote_chars: context.quoted = True return _Quotes(next_char, self) elif next_char == u'\\': return _Backslash(self) else: context.token.append(next_char) return _Word() class _Quotes(object): def __init__(self, quote_char, exit_state): self.quote_char = quote_char self.exit_state = exit_state def process(self, next_char, context): if next_char == u'\\': return _Backslash(self) elif next_char == self.quote_char: context.token.append(u'') return self.exit_state else: context.token.append(next_char) return self class _Backslash(object): # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx def __init__(self, exit_state): self.exit_state = exit_state self.count = 1 def process(self, next_char, context): if next_char == u'\\': self.count += 1 return self elif next_char in context.allowed_quote_chars: # 2N backslashes followed by a quote are N backslashes context.token.append(u'\\' * (self.count/2)) # 2N+1 backslashes follwed by a quote are N backslashes followed by # the quote which should not be processed as the start or end of # the quoted arg if self.count % 2 == 1: # odd number of \ escapes the quote context.token.append(next_char) else: # let exit_state handle next_char context.seq.pushback(next_char) self.count = 0 return self.exit_state else: # N backslashes not followed by a quote are just N backslashes if self.count > 0: context.token.append(u'\\' * self.count) self.count = 0 # let exit_state handle next_char context.seq.pushback(next_char) return self.exit_state def finish(self, context): if self.count > 0: context.token.append(u'\\' * self.count) class _Word(object): def process(self, next_char, context): if _whitespace_match(next_char): return None elif next_char in context.allowed_quote_chars: return _Quotes(next_char, self) elif next_char == u'\\': return _Backslash(self) else: context.token.append(next_char) return self class Splitter(object): def __init__(self, command_line, single_quotes_allowed): self.seq = _PushbackSequence(command_line) self.allowed_quote_chars = u'"' if single_quotes_allowed: self.allowed_quote_chars += u"'" def __iter__(self): return self def next(self): quoted, token = self._get_token() if token is None: raise StopIteration return quoted, token def _get_token(self): self.quoted = False self.token = [] state = _Whitespace() for next_char in self.seq: state = state.process(next_char, self) if state is None: break if not state is None and not getattr(state, 'finish', None) is None: state.finish(self) result = u''.join(self.token) if not self.quoted and result == '': result = None return self.quoted, result def split(unsplit, single_quotes_allowed=True): splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed) return [arg for quoted, arg in splitter] bzr-2.7.0/bzrlib/commands.py0000644000000000000000000013212212017227322014101 0ustar 00000000000000# Copyright (C) 2005-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import # TODO: Define arguments by objects, rather than just using names. # Those objects can specify the expected type of the argument, which # would help with validation and shell completion. They could also provide # help/explanation for that argument in a structured way. # TODO: Specific "examples" property on commands for consistent formatting. import os import sys from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import errno import threading import bzrlib from bzrlib import ( config, cleanup, cmdline, debug, errors, i18n, option, osutils, trace, ui, ) """) from bzrlib.hooks import Hooks from bzrlib.i18n import gettext # Compatibility - Option used to be in commands. from bzrlib.option import Option from bzrlib.plugin import disable_plugins, load_plugins from bzrlib import registry class CommandInfo(object): """Information about a command.""" def __init__(self, aliases): """The list of aliases for the command.""" self.aliases = aliases @classmethod def from_command(klass, command): """Factory to construct a CommandInfo from a command.""" return klass(command.aliases) class CommandRegistry(registry.Registry): """Special registry mapping command names to command classes. :ivar overridden_registry: Look in this registry for commands being overridden by this registry. This can be used to tell plugin commands about the builtin they're decorating. """ def __init__(self): registry.Registry.__init__(self) self.overridden_registry = None # map from aliases to the real command that implements the name self._alias_dict = {} def get(self, command_name): real_name = self._alias_dict.get(command_name, command_name) return registry.Registry.get(self, real_name) @staticmethod def _get_name(command_name): if command_name.startswith("cmd_"): return _unsquish_command_name(command_name) else: return command_name def register(self, cmd, decorate=False): """Utility function to help register a command :param cmd: Command subclass to register :param decorate: If true, allow overriding an existing command of the same name; the old command is returned by this function. Otherwise it is an error to try to override an existing command. """ k = cmd.__name__ k_unsquished = self._get_name(k) try: previous = self.get(k_unsquished) except KeyError: previous = None if self.overridden_registry: try: previous = self.overridden_registry.get(k_unsquished) except KeyError: pass info = CommandInfo.from_command(cmd) try: registry.Registry.register(self, k_unsquished, cmd, override_existing=decorate, info=info) except KeyError: trace.warning('Two plugins defined the same command: %r' % k) trace.warning('Not loading the one in %r' % sys.modules[cmd.__module__]) trace.warning('Previously this command was registered from %r' % sys.modules[previous.__module__]) for a in cmd.aliases: self._alias_dict[a] = k_unsquished return previous def register_lazy(self, command_name, aliases, module_name): """Register a command without loading its module. :param command_name: The primary name of the command. :param aliases: A list of aliases for the command. :module_name: The module that the command lives in. """ key = self._get_name(command_name) registry.Registry.register_lazy(self, key, module_name, command_name, info=CommandInfo(aliases)) for a in aliases: self._alias_dict[a] = key plugin_cmds = CommandRegistry() builtin_command_registry = CommandRegistry() plugin_cmds.overridden_registry = builtin_command_registry def register_command(cmd, decorate=False): """Register a plugin command. Should generally be avoided in favor of lazy registration. """ global plugin_cmds return plugin_cmds.register(cmd, decorate) def _squish_command_name(cmd): return 'cmd_' + cmd.replace('-', '_') def _unsquish_command_name(cmd): return cmd[4:].replace('_','-') def _register_builtin_commands(): if builtin_command_registry.keys(): # only load once return import bzrlib.builtins for cmd_class in _scan_module_for_commands(bzrlib.builtins).values(): builtin_command_registry.register(cmd_class) bzrlib.builtins._register_lazy_builtins() def _scan_module_for_commands(module): r = {} for name, obj in module.__dict__.iteritems(): if name.startswith("cmd_"): real_name = _unsquish_command_name(name) r[real_name] = obj return r def _list_bzr_commands(names): """Find commands from bzr's core and plugins. This is not the public interface, just the default hook called by all_command_names. """ # to eliminate duplicates names.update(builtin_command_names()) names.update(plugin_command_names()) return names def all_command_names(): """Return a set of all command names.""" names = set() for hook in Command.hooks['list_commands']: names = hook(names) if names is None: raise AssertionError( 'hook %s returned None' % Command.hooks.get_hook_name(hook)) return names def builtin_command_names(): """Return list of builtin command names. Use of all_command_names() is encouraged rather than builtin_command_names and/or plugin_command_names. """ _register_builtin_commands() return builtin_command_registry.keys() def plugin_command_names(): """Returns command names from commands registered by plugins.""" return plugin_cmds.keys() def get_cmd_object(cmd_name, plugins_override=True): """Return the command object for a command. plugins_override If true, plugin commands can override builtins. """ try: return _get_cmd_object(cmd_name, plugins_override) except KeyError: raise errors.BzrCommandError(gettext('unknown command "%s"') % cmd_name) def _get_cmd_object(cmd_name, plugins_override=True, check_missing=True): """Get a command object. :param cmd_name: The name of the command. :param plugins_override: Allow plugins to override builtins. :param check_missing: Look up commands not found in the regular index via the get_missing_command hook. :return: A Command object instance :raises KeyError: If no command is found. """ # We want only 'ascii' command names, but the user may have typed # in a Unicode name. In that case, they should just get a # 'command not found' error later. # In the future, we may actually support Unicode command names. cmd = None # Get a command for hook in Command.hooks['get_command']: cmd = hook(cmd, cmd_name) if cmd is not None and not plugins_override and not cmd.plugin_name(): # We've found a non-plugin command, don't permit it to be # overridden. break if cmd is None and check_missing: for hook in Command.hooks['get_missing_command']: cmd = hook(cmd_name) if cmd is not None: break if cmd is None: # No command found. raise KeyError # Allow plugins to extend commands for hook in Command.hooks['extend_command']: hook(cmd) if getattr(cmd, 'invoked_as', None) is None: cmd.invoked_as = cmd_name return cmd def _try_plugin_provider(cmd_name): """Probe for a plugin provider having cmd_name.""" try: plugin_metadata, provider = probe_for_provider(cmd_name) raise errors.CommandAvailableInPlugin(cmd_name, plugin_metadata, provider) except errors.NoPluginAvailable: pass def probe_for_provider(cmd_name): """Look for a provider for cmd_name. :param cmd_name: The command name. :return: plugin_metadata, provider for getting cmd_name. :raises NoPluginAvailable: When no provider can supply the plugin. """ # look for providers that provide this command but aren't installed for provider in command_providers_registry: try: return provider.plugin_for_command(cmd_name), provider except errors.NoPluginAvailable: pass raise errors.NoPluginAvailable(cmd_name) def _get_bzr_command(cmd_or_None, cmd_name): """Get a command from bzr's core.""" try: cmd_class = builtin_command_registry.get(cmd_name) except KeyError: pass else: return cmd_class() return cmd_or_None def _get_external_command(cmd_or_None, cmd_name): """Lookup a command that is a shell script.""" # Only do external command lookups when no command is found so far. if cmd_or_None is not None: return cmd_or_None from bzrlib.externalcommand import ExternalCommand cmd_obj = ExternalCommand.find_command(cmd_name) if cmd_obj: return cmd_obj def _get_plugin_command(cmd_or_None, cmd_name): """Get a command from bzr's plugins.""" try: return plugin_cmds.get(cmd_name)() except KeyError: pass for key in plugin_cmds.keys(): info = plugin_cmds.get_info(key) if cmd_name in info.aliases: return plugin_cmds.get(key)() return cmd_or_None class Command(object): """Base class for commands. Commands are the heart of the command-line bzr interface. The command object mostly handles the mapping of command-line parameters into one or more bzrlib operations, and of the results into textual output. Commands normally don't have any state. All their arguments are passed in to the run method. (Subclasses may take a different policy if the behaviour of the instance needs to depend on e.g. a shell plugin and not just its Python class.) The docstring for an actual command should give a single-line summary, then a complete description of the command. A grammar description will be inserted. :cvar aliases: Other accepted names for this command. :cvar takes_args: List of argument forms, marked with whether they are optional, repeated, etc. Examples:: ['to_location', 'from_branch?', 'file*'] * 'to_location' is required * 'from_branch' is optional * 'file' can be specified 0 or more times :cvar takes_options: List of options that may be given for this command. These can be either strings, referring to globally-defined options, or option objects. Retrieve through options(). :cvar hidden: If true, this command isn't advertised. This is typically for commands intended for expert users. :cvar encoding_type: Command objects will get a 'outf' attribute, which has been setup to properly handle encoding of unicode strings. encoding_type determines what will happen when characters cannot be encoded: * strict - abort if we cannot decode * replace - put in a bogus character (typically '?') * exact - do not encode sys.stdout NOTE: by default on Windows, sys.stdout is opened as a text stream, therefore LF line-endings are converted to CRLF. When a command uses encoding_type = 'exact', then sys.stdout is forced to be a binary stream, and line-endings will not mangled. :cvar invoked_as: A string indicating the real name under which this command was invoked, before expansion of aliases. (This may be None if the command was constructed and run in-process.) :cvar hooks: An instance of CommandHooks. :cvar __doc__: The help shown by 'bzr help command' for this command. This is set by assigning explicitly to __doc__ so that -OO can be used:: class Foo(Command): __doc__ = "My help goes here" """ aliases = [] takes_args = [] takes_options = [] encoding_type = 'strict' invoked_as = None l10n = True hidden = False def __init__(self): """Construct an instance of this command.""" # List of standard options directly supported self.supported_std_options = [] self._setup_run() def add_cleanup(self, cleanup_func, *args, **kwargs): """Register a function to call after self.run returns or raises. Functions will be called in LIFO order. """ self._operation.add_cleanup(cleanup_func, *args, **kwargs) def cleanup_now(self): """Execute and empty pending cleanup functions immediately. After cleanup_now all registered cleanups are forgotten. add_cleanup may be called again after cleanup_now; these cleanups will be called after self.run returns or raises (or when cleanup_now is next called). This is useful for releasing expensive or contentious resources (such as write locks) before doing further work that does not require those resources (such as writing results to self.outf). Note though, that as it releases all resources, this may release locks that the command wants to hold, so use should be done with care. """ self._operation.cleanup_now() def _usage(self): """Return single-line grammar for this command. Only describes arguments, not options. """ s = 'bzr ' + self.name() + ' ' for aname in self.takes_args: aname = aname.upper() if aname[-1] in ['$', '+']: aname = aname[:-1] + '...' elif aname[-1] == '?': aname = '[' + aname[:-1] + ']' elif aname[-1] == '*': aname = '[' + aname[:-1] + '...]' s += aname + ' ' s = s[:-1] # remove last space return s def get_help_text(self, additional_see_also=None, plain=True, see_also_as_links=False, verbose=True): """Return a text string with help for this command. :param additional_see_also: Additional help topics to be cross-referenced. :param plain: if False, raw help (reStructuredText) is returned instead of plain text. :param see_also_as_links: if True, convert items in 'See also' list to internal links (used by bzr_man rstx generator) :param verbose: if True, display the full help, otherwise leave out the descriptive sections and just display usage help (e.g. Purpose, Usage, Options) with a message explaining how to obtain full help. """ if self.l10n: i18n.install() # Install i18n only for get_help_text for now. doc = self.help() if doc: # Note: If self.gettext() translates ':Usage:\n', the section will # be shown after "Description" section and we don't want to # translate the usage string. # Though, bzr export-pot don't exports :Usage: section and it must # not be translated. doc = self.gettext(doc) else: doc = gettext("No help for this command.") # Extract the summary (purpose) and sections out from the text purpose,sections,order = self._get_help_parts(doc) # If a custom usage section was provided, use it if sections.has_key('Usage'): usage = sections.pop('Usage') else: usage = self._usage() # The header is the purpose and usage result = "" result += gettext(':Purpose: %s\n') % (purpose,) if usage.find('\n') >= 0: result += gettext(':Usage:\n%s\n') % (usage,) else: result += gettext(':Usage: %s\n') % (usage,) result += '\n' # Add the options # # XXX: optparse implicitly rewraps the help, and not always perfectly, # so we get . -- mbp # 20090319 parser = option.get_optparser(self.options()) options = parser.format_option_help() # FIXME: According to the spec, ReST option lists actually don't # support options like --1.14 so that causes syntax errors (in Sphinx # at least). As that pattern always appears in the commands that # break, we trap on that and then format that block of 'format' options # as a literal block. We use the most recent format still listed so we # don't have to do that too often -- vila 20110514 if not plain and options.find(' --1.14 ') != -1: options = options.replace(' format:\n', ' format::\n\n', 1) if options.startswith('Options:'): result += gettext(':Options:%s') % (options[len('options:'):],) else: result += options result += '\n' if verbose: # Add the description, indenting it 2 spaces # to match the indentation of the options if sections.has_key(None): text = sections.pop(None) text = '\n '.join(text.splitlines()) result += gettext(':Description:\n %s\n\n') % (text,) # Add the custom sections (e.g. Examples). Note that there's no need # to indent these as they must be indented already in the source. if sections: for label in order: if label in sections: result += ':%s:\n%s\n' % (label, sections[label]) result += '\n' else: result += (gettext("See bzr help %s for more details and examples.\n\n") % self.name()) # Add the aliases, source (plug-in) and see also links, if any if self.aliases: result += gettext(':Aliases: ') result += ', '.join(self.aliases) + '\n' plugin_name = self.plugin_name() if plugin_name is not None: result += gettext(':From: plugin "%s"\n') % plugin_name see_also = self.get_see_also(additional_see_also) if see_also: if not plain and see_also_as_links: see_also_links = [] for item in see_also: if item == 'topics': # topics doesn't have an independent section # so don't create a real link see_also_links.append(item) else: # Use a Sphinx link for this entry link_text = gettext(":doc:`{0} <{1}-help>`").format( item, item) see_also_links.append(link_text) see_also = see_also_links result += gettext(':See also: %s') % ', '.join(see_also) + '\n' # If this will be rendered as plain text, convert it if plain: import bzrlib.help_topics result = bzrlib.help_topics.help_as_plain_text(result) return result @staticmethod def _get_help_parts(text): """Split help text into a summary and named sections. :return: (summary,sections,order) where summary is the top line and sections is a dictionary of the rest indexed by section name. order is the order the section appear in the text. A section starts with a heading line of the form ":xxx:". Indented text on following lines is the section value. All text found outside a named section is assigned to the default section which is given the key of None. """ def save_section(sections, order, label, section): if len(section) > 0: if sections.has_key(label): sections[label] += '\n' + section else: order.append(label) sections[label] = section lines = text.rstrip().splitlines() summary = lines.pop(0) sections = {} order = [] label,section = None,'' for line in lines: if line.startswith(':') and line.endswith(':') and len(line) > 2: save_section(sections, order, label, section) label,section = line[1:-1],'' elif (label is not None) and len(line) > 1 and not line[0].isspace(): save_section(sections, order, label, section) label,section = None,line else: if len(section) > 0: section += '\n' + line else: section = line save_section(sections, order, label, section) return summary, sections, order def get_help_topic(self): """Return the commands help topic - its name.""" return self.name() def get_see_also(self, additional_terms=None): """Return a list of help topics that are related to this command. The list is derived from the content of the _see_also attribute. Any duplicates are removed and the result is in lexical order. :param additional_terms: Additional help topics to cross-reference. :return: A list of help topics. """ see_also = set(getattr(self, '_see_also', [])) if additional_terms: see_also.update(additional_terms) return sorted(see_also) def options(self): """Return dict of valid options for this command. Maps from long option name to option object.""" r = Option.STD_OPTIONS.copy() std_names = r.keys() for o in self.takes_options: if isinstance(o, basestring): o = option.Option.OPTIONS[o] r[o.name] = o if o.name in std_names: self.supported_std_options.append(o.name) return r def _setup_outf(self): """Return a file linked to stdout, which has proper encoding.""" self.outf = ui.ui_factory.make_output_stream( encoding_type=self.encoding_type) def run_argv_aliases(self, argv, alias_argv=None): """Parse the command line and run with extra aliases in alias_argv.""" args, opts = parse_args(self, argv, alias_argv) self._setup_outf() # Process the standard options if 'help' in opts: # e.g. bzr add --help self.outf.write(self.get_help_text()) return 0 if 'usage' in opts: # e.g. bzr add --usage self.outf.write(self.get_help_text(verbose=False)) return 0 trace.set_verbosity_level(option._verbosity_level) if 'verbose' in self.supported_std_options: opts['verbose'] = trace.is_verbose() elif opts.has_key('verbose'): del opts['verbose'] if 'quiet' in self.supported_std_options: opts['quiet'] = trace.is_quiet() elif opts.has_key('quiet'): del opts['quiet'] # mix arguments and options into one dictionary cmdargs = _match_argform(self.name(), self.takes_args, args) cmdopts = {} for k, v in opts.items(): cmdopts[k.replace('-', '_')] = v all_cmd_args = cmdargs.copy() all_cmd_args.update(cmdopts) try: return self.run(**all_cmd_args) finally: # reset it, so that other commands run in the same process won't # inherit state. Before we reset it, log any activity, so that it # gets properly tracked. ui.ui_factory.log_transport_activity( display=('bytes' in debug.debug_flags)) trace.set_verbosity_level(0) def _setup_run(self): """Wrap the defined run method on self with a cleanup. This is called by __init__ to make the Command be able to be run by just calling run(), as it could be before cleanups were added. If a different form of cleanups are in use by your Command subclass, you can override this method. """ class_run = self.run def run(*args, **kwargs): for hook in Command.hooks['pre_command']: hook(self) self._operation = cleanup.OperationWithCleanups(class_run) try: return self._operation.run_simple(*args, **kwargs) finally: del self._operation for hook in Command.hooks['post_command']: hook(self) self.run = run def run(self): """Actually run the command. This is invoked with the options and arguments bound to keyword parameters. Return 0 or None if the command was successful, or a non-zero shell error code if not. It's OK for this method to allow an exception to raise up. This method is automatically wrapped by Command.__init__ with a cleanup operation, stored as self._operation. This can be used via self.add_cleanup to perform automatic cleanups at the end of run(). The argument for run are assembled by introspection. So for instance, if your command takes an argument files, you would declare:: def run(self, files=None): pass """ raise NotImplementedError('no implementation of command %r' % self.name()) def help(self): """Return help message for this class.""" from inspect import getdoc if self.__doc__ is Command.__doc__: return None return getdoc(self) def gettext(self, message): """Returns the gettext function used to translate this command's help. Commands provided by plugins should override this to use their own i18n system. """ return i18n.gettext_per_paragraph(message) def name(self): """Return the canonical name for this command. The name under which it was actually invoked is available in invoked_as. """ return _unsquish_command_name(self.__class__.__name__) def plugin_name(self): """Get the name of the plugin that provides this command. :return: The name of the plugin or None if the command is builtin. """ mod_parts = self.__module__.split('.') if len(mod_parts) >= 3 and mod_parts[1] == 'plugins': return mod_parts[2] else: return None class CommandHooks(Hooks): """Hooks related to Command object creation/enumeration.""" def __init__(self): """Create the default hooks. These are all empty initially, because by default nothing should get notified. """ Hooks.__init__(self, "bzrlib.commands", "Command.hooks") self.add_hook('extend_command', "Called after creating a command object to allow modifications " "such as adding or removing options, docs etc. Called with the " "new bzrlib.commands.Command object.", (1, 13)) self.add_hook('get_command', "Called when creating a single command. Called with " "(cmd_or_None, command_name). get_command should either return " "the cmd_or_None parameter, or a replacement Command object that " "should be used for the command. Note that the Command.hooks " "hooks are core infrastructure. Many users will prefer to use " "bzrlib.commands.register_command or plugin_cmds.register_lazy.", (1, 17)) self.add_hook('get_missing_command', "Called when creating a single command if no command could be " "found. Called with (command_name). get_missing_command should " "either return None, or a Command object to be used for the " "command.", (1, 17)) self.add_hook('list_commands', "Called when enumerating commands. Called with a set of " "cmd_name strings for all the commands found so far. This set " " is safe to mutate - e.g. to remove a command. " "list_commands should return the updated set of command names.", (1, 17)) self.add_hook('pre_command', "Called prior to executing a command. Called with the command " "object.", (2, 6)) self.add_hook('post_command', "Called after executing a command. Called with the command " "object.", (2, 6)) Command.hooks = CommandHooks() def parse_args(command, argv, alias_argv=None): """Parse command line. Arguments and options are parsed at this level before being passed down to specific command handlers. This routine knows, from a lookup table, something about the available options, what optargs they take, and which commands will accept them. """ # TODO: make it a method of the Command? parser = option.get_optparser(command.options()) if alias_argv is not None: args = alias_argv + argv else: args = argv # for python 2.5 and later, optparse raises this exception if a non-ascii # option name is given. See http://bugs.python.org/issue2931 try: options, args = parser.parse_args(args) except UnicodeEncodeError,e: raise errors.BzrCommandError( gettext('Only ASCII permitted in option names')) opts = dict([(k, v) for k, v in options.__dict__.iteritems() if v is not option.OptionParser.DEFAULT_VALUE]) return args, opts def _match_argform(cmd, takes_args, args): argdict = {} # step through args and takes_args, allowing appropriate 0-many matches for ap in takes_args: argname = ap[:-1] if ap[-1] == '?': if args: argdict[argname] = args.pop(0) elif ap[-1] == '*': # all remaining arguments if args: argdict[argname + '_list'] = args[:] args = [] else: argdict[argname + '_list'] = None elif ap[-1] == '+': if not args: raise errors.BzrCommandError(gettext( "command {0!r} needs one or more {1}").format( cmd, argname.upper())) else: argdict[argname + '_list'] = args[:] args = [] elif ap[-1] == '$': # all but one if len(args) < 2: raise errors.BzrCommandError( gettext("command {0!r} needs one or more {1}").format( cmd, argname.upper())) argdict[argname + '_list'] = args[:-1] args[:-1] = [] else: # just a plain arg argname = ap if not args: raise errors.BzrCommandError( gettext("command {0!r} requires argument {1}").format( cmd, argname.upper())) else: argdict[argname] = args.pop(0) if args: raise errors.BzrCommandError( gettext( "extra argument to command {0}: {1}").format( cmd, args[0]) ) return argdict def apply_coveraged(dirname, the_callable, *args, **kwargs): # Cannot use "import trace", as that would import bzrlib.trace instead of # the standard library's trace. trace = __import__('trace') tracer = trace.Trace(count=1, trace=0) sys.settrace(tracer.globaltrace) threading.settrace(tracer.globaltrace) try: return exception_to_return_code(the_callable, *args, **kwargs) finally: sys.settrace(None) results = tracer.results() results.write_results(show_missing=1, summary=False, coverdir=dirname) def apply_profiled(the_callable, *args, **kwargs): import hotshot import tempfile import hotshot.stats pffileno, pfname = tempfile.mkstemp() try: prof = hotshot.Profile(pfname) try: ret = prof.runcall(exception_to_return_code, the_callable, *args, **kwargs) or 0 finally: prof.close() stats = hotshot.stats.load(pfname) stats.strip_dirs() stats.sort_stats('cum') # 'time' ## XXX: Might like to write to stderr or the trace file instead but ## print_stats seems hardcoded to stdout stats.print_stats(20) return ret finally: os.close(pffileno) os.remove(pfname) def exception_to_return_code(the_callable, *args, **kwargs): """UI level helper for profiling and coverage. This transforms exceptions into a return value of 3. As such its only relevant to the UI layer, and should never be called where catching exceptions may be desirable. """ try: return the_callable(*args, **kwargs) except (KeyboardInterrupt, Exception), e: # used to handle AssertionError and KeyboardInterrupt # specially here, but hopefully they're handled ok by the logger now exc_info = sys.exc_info() exitcode = trace.report_exception(exc_info, sys.stderr) if os.environ.get('BZR_PDB'): print '**** entering debugger' import pdb pdb.post_mortem(exc_info[2]) return exitcode def apply_lsprofiled(filename, the_callable, *args, **kwargs): from bzrlib.lsprof import profile ret, stats = profile(exception_to_return_code, the_callable, *args, **kwargs) stats.sort() if filename is None: stats.pprint() else: stats.save(filename) trace.note(gettext('Profile data written to "%s".'), filename) return ret def get_alias(cmd, config=None): """Return an expanded alias, or None if no alias exists. cmd Command to be checked for an alias. config Used to specify an alternative config to use, which is especially useful for testing. If it is unspecified, the global config will be used. """ if config is None: import bzrlib.config config = bzrlib.config.GlobalConfig() alias = config.get_alias(cmd) if (alias): return cmdline.split(alias) return None def run_bzr(argv, load_plugins=load_plugins, disable_plugins=disable_plugins): """Execute a command. :param argv: The command-line arguments, without the program name from argv[0] These should already be decoded. All library/test code calling run_bzr should be passing valid strings (don't need decoding). :param load_plugins: What function to call when triggering plugin loading. This function should take no arguments and cause all plugins to be loaded. :param disable_plugins: What function to call when disabling plugin loading. This function should take no arguments and cause all plugin loading to be prohibited (so that code paths in your application that know about some plugins possibly being present will fail to import those plugins even if they are installed.) :return: Returns a command exit code or raises an exception. Special master options: these must come before the command because they control how the command is interpreted. --no-plugins Do not load plugin modules at all --no-aliases Do not allow aliases --builtin Only use builtin commands. (Plugins are still allowed to change other behaviour.) --profile Run under the Python hotshot profiler. --lsprof Run under the Python lsprof profiler. --coverage Generate line coverage report in the specified directory. --concurrency Specify the number of processes that can be run concurrently (selftest). """ trace.mutter("bazaar version: " + bzrlib.__version__) argv = _specified_or_unicode_argv(argv) trace.mutter("bzr arguments: %r", argv) opt_lsprof = opt_profile = opt_no_plugins = opt_builtin = \ opt_no_l10n = opt_no_aliases = False opt_lsprof_file = opt_coverage_dir = None # --no-plugins is handled specially at a very early stage. We need # to load plugins before doing other command parsing so that they # can override commands, but this needs to happen first. argv_copy = [] i = 0 override_config = [] while i < len(argv): a = argv[i] if a == '--profile': opt_profile = True elif a == '--lsprof': opt_lsprof = True elif a == '--lsprof-file': opt_lsprof = True opt_lsprof_file = argv[i + 1] i += 1 elif a == '--no-plugins': opt_no_plugins = True elif a == '--no-aliases': opt_no_aliases = True elif a == '--no-l10n': opt_no_l10n = True elif a == '--builtin': opt_builtin = True elif a == '--concurrency': os.environ['BZR_CONCURRENCY'] = argv[i + 1] i += 1 elif a == '--coverage': opt_coverage_dir = argv[i + 1] i += 1 elif a == '--profile-imports': pass # already handled in startup script Bug #588277 elif a.startswith('-D'): debug.debug_flags.add(a[2:]) elif a.startswith('-O'): override_config.append(a[2:]) else: argv_copy.append(a) i += 1 if bzrlib.global_state is None: # FIXME: Workaround for users that imported bzrlib but didn't call # bzrlib.initialize -- vila 2012-01-19 cmdline_overrides = config.CommandLineStore() else: cmdline_overrides = bzrlib.global_state.cmdline_overrides cmdline_overrides._from_cmdline(override_config) debug.set_debug_flags_from_config() if not opt_no_plugins: load_plugins() else: disable_plugins() argv = argv_copy if (not argv): get_cmd_object('help').run_argv_aliases([]) return 0 if argv[0] == '--version': get_cmd_object('version').run_argv_aliases([]) return 0 alias_argv = None if not opt_no_aliases: alias_argv = get_alias(argv[0]) if alias_argv: argv[0] = alias_argv.pop(0) cmd = argv.pop(0) cmd_obj = get_cmd_object(cmd, plugins_override=not opt_builtin) if opt_no_l10n: cmd.l10n = False run = cmd_obj.run_argv_aliases run_argv = [argv, alias_argv] try: # We can be called recursively (tests for example), but we don't want # the verbosity level to propagate. saved_verbosity_level = option._verbosity_level option._verbosity_level = 0 if opt_lsprof: if opt_coverage_dir: trace.warning( '--coverage ignored, because --lsprof is in use.') ret = apply_lsprofiled(opt_lsprof_file, run, *run_argv) elif opt_profile: if opt_coverage_dir: trace.warning( '--coverage ignored, because --profile is in use.') ret = apply_profiled(run, *run_argv) elif opt_coverage_dir: ret = apply_coveraged(opt_coverage_dir, run, *run_argv) else: ret = run(*run_argv) return ret or 0 finally: # reset, in case we may do other commands later within the same # process. Commands that want to execute sub-commands must propagate # --verbose in their own way. if 'memory' in debug.debug_flags: trace.debug_memory('Process status after command:', short=False) option._verbosity_level = saved_verbosity_level # Reset the overrides cmdline_overrides._reset() def display_command(func): """Decorator that suppresses pipe/interrupt errors.""" def ignore_pipe(*args, **kwargs): try: result = func(*args, **kwargs) sys.stdout.flush() return result except IOError, e: if getattr(e, 'errno', None) is None: raise if e.errno != errno.EPIPE: # Win32 raises IOError with errno=0 on a broken pipe if sys.platform != 'win32' or (e.errno not in (0, errno.EINVAL)): raise pass except KeyboardInterrupt: pass return ignore_pipe def install_bzr_command_hooks(): """Install the hooks to supply bzr's own commands.""" if _list_bzr_commands in Command.hooks["list_commands"]: return Command.hooks.install_named_hook("list_commands", _list_bzr_commands, "bzr commands") Command.hooks.install_named_hook("get_command", _get_bzr_command, "bzr commands") Command.hooks.install_named_hook("get_command", _get_plugin_command, "bzr plugin commands") Command.hooks.install_named_hook("get_command", _get_external_command, "bzr external command lookup") Command.hooks.install_named_hook("get_missing_command", _try_plugin_provider, "bzr plugin-provider-db check") def _specified_or_unicode_argv(argv): # For internal or testing use, argv can be passed. Otherwise, get it from # the process arguments in a unicode-safe way. if argv is None: return osutils.get_unicode_argv() else: new_argv = [] try: # ensure all arguments are unicode strings for a in argv: if isinstance(a, unicode): new_argv.append(a) else: new_argv.append(a.decode('ascii')) except UnicodeDecodeError: raise errors.BzrError("argv should be list of unicode strings.") return new_argv def main(argv=None): """Main entry point of command-line interface. Typically `bzrlib.initialize` should be called first. :param argv: list of unicode command-line arguments similar to sys.argv. argv[0] is script name usually, it will be ignored. Don't pass here sys.argv because this list contains plain strings and not unicode; pass None instead. :return: exit code of bzr command. """ if argv is not None: argv = argv[1:] _register_builtin_commands() ret = run_bzr_catch_errors(argv) trace.mutter("return code %d", ret) return ret def run_bzr_catch_errors(argv): """Run a bzr command with parameters as described by argv. This function assumed that that UI layer is setup, that symbol deprecations are already applied, and that unicode decoding has already been performed on argv. """ # done here so that they're covered for every test run install_bzr_command_hooks() return exception_to_return_code(run_bzr, argv) def run_bzr_catch_user_errors(argv): """Run bzr and report user errors, but let internal errors propagate. This is used for the test suite, and might be useful for other programs that want to wrap the commandline interface. """ # done here so that they're covered for every test run install_bzr_command_hooks() try: return run_bzr(argv) except Exception, e: if (isinstance(e, (OSError, IOError)) or not getattr(e, 'internal_error', True)): trace.report_exception(sys.exc_info(), sys.stderr) return 3 else: raise class HelpCommandIndex(object): """A index for bzr help that returns commands.""" def __init__(self): self.prefix = 'commands/' def get_topics(self, topic): """Search for topic amongst commands. :param topic: A topic to search for. :return: A list which is either empty or contains a single Command entry. """ if topic and topic.startswith(self.prefix): topic = topic[len(self.prefix):] try: cmd = _get_cmd_object(topic, check_missing=False) except KeyError: return [] else: return [cmd] class Provider(object): """Generic class to be overriden by plugins""" def plugin_for_command(self, cmd_name): """Takes a command and returns the information for that plugin :return: A dictionary with all the available information for the requested plugin """ raise NotImplementedError class ProvidersRegistry(registry.Registry): """This registry exists to allow other providers to exist""" def __iter__(self): for key, provider in self.iteritems(): yield provider command_providers_registry = ProvidersRegistry() bzr-2.7.0/bzrlib/commit.py0000644000000000000000000012723011717502351013600 0ustar 00000000000000# Copyright (C) 2005-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import # The newly committed revision is going to have a shape corresponding # to that of the working tree. Files that are not in the # working tree and that were in the predecessor are reported as # removed --- this can include files that were either removed from the # inventory or deleted in the working tree. If they were only # deleted from disk, they are removed from the working inventory. # We then consider the remaining entries, which will be in the new # version. Directory entries are simply copied across. File entries # must be checked to see if a new version of the file should be # recorded. For each parent revision tree, we check to see what # version of the file was present. If the file was present in at # least one tree, and if it was the same version in all the trees, # then we can just refer to that version. Otherwise, a new version # representing the merger of the file versions must be added. # TODO: Update hashcache before and after - or does the WorkingTree # look after that? # TODO: Rather than mashing together the ancestry and storing it back, # perhaps the weave should have single method which does it all in one # go, avoiding a lot of redundant work. # TODO: Perhaps give a warning if one of the revisions marked as # merged is already in the ancestry, and then don't record it as a # distinct parent. # TODO: If the file is newly merged but unchanged from the version it # merges from, then it should still be reported as newly added # relative to the basis revision. # TODO: Change the parameter 'rev_id' to 'revision_id' to be consistent with # the rest of the code; add a deprecation of the old name. from bzrlib import ( debug, errors, trace, tree, ui, ) from bzrlib.branch import Branch from bzrlib.cleanup import OperationWithCleanups import bzrlib.config from bzrlib.errors import (BzrError, PointlessCommit, ConflictsInTree, StrictCommitFailed ) from bzrlib.osutils import (get_user_encoding, is_inside_any, minimum_path_selection, splitpath, ) from bzrlib.trace import mutter, note, is_quiet from bzrlib.inventory import Inventory, InventoryEntry, make_entry from bzrlib import symbol_versioning from bzrlib.urlutils import unescape_for_display from bzrlib.i18n import gettext class NullCommitReporter(object): """I report on progress of a commit.""" def started(self, revno, revid, location=None): if location is None: symbol_versioning.warn("As of bzr 1.0 you must pass a location " "to started.", DeprecationWarning, stacklevel=2) pass def snapshot_change(self, change, path): pass def completed(self, revno, rev_id): pass def deleted(self, path): pass def missing(self, path): pass def renamed(self, change, old_path, new_path): pass def is_verbose(self): return False class ReportCommitToLog(NullCommitReporter): def _note(self, format, *args): """Output a message. Subclasses may choose to override this method. """ note(format, *args) def snapshot_change(self, change, path): if path == '' and change in (gettext('added'), gettext('modified')): return self._note("%s %s", change, path) def started(self, revno, rev_id, location=None): if location is not None: location = ' to: ' + unescape_for_display(location, 'utf-8') else: # When started was added, location was only made optional by # accident. Matt Nordhoff 20071129 symbol_versioning.warn("As of bzr 1.0 you must pass a location " "to started.", DeprecationWarning, stacklevel=2) location = '' self._note(gettext('Committing%s'), location) def completed(self, revno, rev_id): self._note(gettext('Committed revision %d.'), revno) # self._note goes to the console too; so while we want to log the # rev_id, we can't trivially only log it. (See bug 526425). Long # term we should rearrange the reporting structure, but for now # we just mutter seperately. We mutter the revid and revno together # so that concurrent bzr invocations won't lead to confusion. mutter('Committed revid %s as revno %d.', rev_id, revno) def deleted(self, path): self._note(gettext('deleted %s'), path) def missing(self, path): self._note(gettext('missing %s'), path) def renamed(self, change, old_path, new_path): self._note('%s %s => %s', change, old_path, new_path) def is_verbose(self): return True class Commit(object): """Task of committing a new revision. This is a MethodObject: it accumulates state as the commit is prepared, and then it is discarded. It doesn't represent historical revisions, just the act of recording a new one. missing_ids Modified to hold a list of files that have been deleted from the working directory; these should be removed from the working inventory. """ def __init__(self, reporter=None, config_stack=None): """Create a Commit object. :param reporter: the default reporter to use or None to decide later """ self.reporter = reporter self.config_stack = config_stack @staticmethod def update_revprops(revprops, branch, authors=None, author=None, local=False, possible_master_transports=None): if revprops is None: revprops = {} if possible_master_transports is None: possible_master_transports = [] if not 'branch-nick' in revprops: revprops['branch-nick'] = branch._get_nick( local, possible_master_transports) if authors is not None: if author is not None: raise AssertionError('Specifying both author and authors ' 'is not allowed. Specify just authors instead') if 'author' in revprops or 'authors' in revprops: # XXX: maybe we should just accept one of them? raise AssertionError('author property given twice') if authors: for individual in authors: if '\n' in individual: raise AssertionError('\\n is not a valid character ' 'in an author identity') revprops['authors'] = '\n'.join(authors) if author is not None: symbol_versioning.warn('The parameter author was deprecated' ' in version 1.13. Use authors instead', DeprecationWarning) if 'author' in revprops or 'authors' in revprops: # XXX: maybe we should just accept one of them? raise AssertionError('author property given twice') if '\n' in author: raise AssertionError('\\n is not a valid character ' 'in an author identity') revprops['authors'] = author return revprops def commit(self, message=None, timestamp=None, timezone=None, committer=None, specific_files=None, rev_id=None, allow_pointless=True, strict=False, verbose=False, revprops=None, working_tree=None, local=False, reporter=None, config=None, message_callback=None, recursive='down', exclude=None, possible_master_transports=None, lossy=False): """Commit working copy as a new revision. :param message: the commit message (it or message_callback is required) :param message_callback: A callback: message = message_callback(cmt_obj) :param timestamp: if not None, seconds-since-epoch for a postdated/predated commit. :param specific_files: If not None, commit only those files. An empty list means 'commit no files'. :param rev_id: If set, use this as the new revision id. Useful for test or import commands that need to tightly control what revisions are assigned. If you duplicate a revision id that exists elsewhere it is your own fault. If null (default), a time/random revision id is generated. :param allow_pointless: If true (default), commit even if nothing has changed and no merges are recorded. :param strict: If true, don't allow a commit if the working tree contains unknown files. :param revprops: Properties for new revision :param local: Perform a local only commit. :param reporter: the reporter to use or None for the default :param verbose: if True and the reporter is not None, report everything :param recursive: If set to 'down', commit in any subtrees that have pending changes of any sort during this commit. :param exclude: None or a list of relative paths to exclude from the commit. Pending changes to excluded files will be ignored by the commit. :param lossy: When committing to a foreign VCS, ignore any data that can not be natively represented. """ operation = OperationWithCleanups(self._commit) self.revprops = revprops or {} # XXX: Can be set on __init__ or passed in - this is a bit ugly. self.config_stack = config or self.config_stack return operation.run( message=message, timestamp=timestamp, timezone=timezone, committer=committer, specific_files=specific_files, rev_id=rev_id, allow_pointless=allow_pointless, strict=strict, verbose=verbose, working_tree=working_tree, local=local, reporter=reporter, message_callback=message_callback, recursive=recursive, exclude=exclude, possible_master_transports=possible_master_transports, lossy=lossy) def _commit(self, operation, message, timestamp, timezone, committer, specific_files, rev_id, allow_pointless, strict, verbose, working_tree, local, reporter, message_callback, recursive, exclude, possible_master_transports, lossy): mutter('preparing to commit') if working_tree is None: raise BzrError("working_tree must be passed into commit().") else: self.work_tree = working_tree self.branch = self.work_tree.branch if getattr(self.work_tree, 'requires_rich_root', lambda: False)(): if not self.branch.repository.supports_rich_root(): raise errors.RootNotRich() if message_callback is None: if message is not None: if isinstance(message, str): message = message.decode(get_user_encoding()) message_callback = lambda x: message else: raise BzrError("The message or message_callback keyword" " parameter is required for commit().") self.bound_branch = None self.any_entries_deleted = False if exclude is not None: self.exclude = sorted( minimum_path_selection(exclude)) else: self.exclude = [] self.local = local self.master_branch = None self.recursive = recursive self.rev_id = None # self.specific_files is None to indicate no filter, or any iterable to # indicate a filter - [] means no files at all, as per iter_changes. if specific_files is not None: self.specific_files = sorted( minimum_path_selection(specific_files)) else: self.specific_files = None self.allow_pointless = allow_pointless self.message_callback = message_callback self.timestamp = timestamp self.timezone = timezone self.committer = committer self.strict = strict self.verbose = verbose self.work_tree.lock_write() operation.add_cleanup(self.work_tree.unlock) self.parents = self.work_tree.get_parent_ids() # We can use record_iter_changes IFF iter_changes is compatible with # the command line parameters, and the repository has fast delta # generation. See bug 347649. self.use_record_iter_changes = ( not self.exclude and not self.branch.repository._format.supports_tree_reference and (self.branch.repository._format.fast_deltas or len(self.parents) < 2)) self.pb = ui.ui_factory.nested_progress_bar() operation.add_cleanup(self.pb.finished) self.basis_revid = self.work_tree.last_revision() self.basis_tree = self.work_tree.basis_tree() self.basis_tree.lock_read() operation.add_cleanup(self.basis_tree.unlock) # Cannot commit with conflicts present. if len(self.work_tree.conflicts()) > 0: raise ConflictsInTree # Setup the bound branch variables as needed. self._check_bound_branch(operation, possible_master_transports) # Check that the working tree is up to date old_revno, old_revid, new_revno = self._check_out_of_date_tree() # Complete configuration setup if reporter is not None: self.reporter = reporter elif self.reporter is None: self.reporter = self._select_reporter() if self.config_stack is None: self.config_stack = self.work_tree.get_config_stack() self._set_specific_file_ids() # Setup the progress bar. As the number of files that need to be # committed in unknown, progress is reported as stages. # We keep track of entries separately though and include that # information in the progress bar during the relevant stages. self.pb_stage_name = "" self.pb_stage_count = 0 self.pb_stage_total = 5 if self.bound_branch: # 2 extra stages: "Uploading data to master branch" and "Merging # tags to master branch" self.pb_stage_total += 2 self.pb.show_pct = False self.pb.show_spinner = False self.pb.show_eta = False self.pb.show_count = True self.pb.show_bar = True self._gather_parents() # After a merge, a selected file commit is not supported. # See 'bzr help merge' for an explanation as to why. if len(self.parents) > 1 and self.specific_files is not None: raise errors.CannotCommitSelectedFileMerge(self.specific_files) # Excludes are a form of selected file commit. if len(self.parents) > 1 and self.exclude: raise errors.CannotCommitSelectedFileMerge(self.exclude) # Collect the changes self._set_progress_stage("Collecting changes", counter=True) self._lossy = lossy self.builder = self.branch.get_commit_builder(self.parents, self.config_stack, timestamp, timezone, committer, self.revprops, rev_id, lossy=lossy) if not self.builder.supports_record_entry_contents and self.exclude: self.builder.abort() raise errors.ExcludesUnsupported(self.branch.repository) if self.builder.updates_branch and self.bound_branch: self.builder.abort() raise AssertionError( "bound branches not supported for commit builders " "that update the branch") try: self.builder.will_record_deletes() # find the location being committed to if self.bound_branch: master_location = self.master_branch.base else: master_location = self.branch.base # report the start of the commit self.reporter.started(new_revno, self.rev_id, master_location) self._update_builder_with_changes() self._check_pointless() # TODO: Now the new inventory is known, check for conflicts. # ADHB 2006-08-08: If this is done, populate_new_inv should not add # weave lines, because nothing should be recorded until it is known # that commit will succeed. self._set_progress_stage("Saving data locally") self.builder.finish_inventory() # Prompt the user for a commit message if none provided message = message_callback(self) self.message = message # Add revision data to the local branch self.rev_id = self.builder.commit(self.message) except Exception, e: mutter("aborting commit write group because of exception:") trace.log_exception_quietly() self.builder.abort() raise self._update_branches(old_revno, old_revid, new_revno) # Make the working tree be up to date with the branch. This # includes automatic changes scheduled to be made to the tree, such # as updating its basis and unversioning paths that were missing. self.work_tree.unversion(self.deleted_ids) self._set_progress_stage("Updating the working tree") self.work_tree.update_basis_by_delta(self.rev_id, self.builder.get_basis_delta()) self.reporter.completed(new_revno, self.rev_id) self._process_post_hooks(old_revno, new_revno) return self.rev_id def _update_branches(self, old_revno, old_revid, new_revno): """Update the master and local branch to the new revision. This will try to make sure that the master branch is updated before the local branch. :param old_revno: Revision number of master branch before the commit :param old_revid: Tip of master branch before the commit :param new_revno: Revision number of the new commit """ if not self.builder.updates_branch: self._process_pre_hooks(old_revno, new_revno) # Upload revision data to the master. # this will propagate merged revisions too if needed. if self.bound_branch: self._set_progress_stage("Uploading data to master branch") # 'commit' to the master first so a timeout here causes the # local branch to be out of date (new_revno, self.rev_id) = self.master_branch.import_last_revision_info_and_tags( self.branch, new_revno, self.rev_id, lossy=self._lossy) if self._lossy: self.branch.fetch(self.master_branch, self.rev_id) # and now do the commit locally. self.branch.set_last_revision_info(new_revno, self.rev_id) else: try: self._process_pre_hooks(old_revno, new_revno) except: # The commit builder will already have updated the branch, # revert it. self.branch.set_last_revision_info(old_revno, old_revid) raise # Merge local tags to remote if self.bound_branch: self._set_progress_stage("Merging tags to master branch") tag_updates, tag_conflicts = self.branch.tags.merge_to( self.master_branch.tags) if tag_conflicts: warning_lines = [' ' + name for name, _, _ in tag_conflicts] note( gettext("Conflicting tags in bound branch:\n{0}".format( "\n".join(warning_lines))) ) def _select_reporter(self): """Select the CommitReporter to use.""" if is_quiet(): return NullCommitReporter() return ReportCommitToLog() def _check_pointless(self): if self.allow_pointless: return # A merge with no effect on files if len(self.parents) > 1: return if self.builder.any_changes(): return raise PointlessCommit() def _check_bound_branch(self, operation, possible_master_transports=None): """Check to see if the local branch is bound. If it is bound, then most of the commit will actually be done using the remote branch as the target branch. Only at the end will the local branch be updated. """ if self.local and not self.branch.get_bound_location(): raise errors.LocalRequiresBoundBranch() if not self.local: self.master_branch = self.branch.get_master_branch( possible_master_transports) if not self.master_branch: # make this branch the reference branch for out of date checks. self.master_branch = self.branch return # If the master branch is bound, we must fail master_bound_location = self.master_branch.get_bound_location() if master_bound_location: raise errors.CommitToDoubleBoundBranch(self.branch, self.master_branch, master_bound_location) # TODO: jam 20051230 We could automatically push local # commits to the remote branch if they would fit. # But for now, just require remote to be identical # to local. # Make sure the local branch is identical to the master master_info = self.master_branch.last_revision_info() local_info = self.branch.last_revision_info() if local_info != master_info: raise errors.BoundBranchOutOfDate(self.branch, self.master_branch) # Now things are ready to change the master branch # so grab the lock self.bound_branch = self.branch self.master_branch.lock_write() operation.add_cleanup(self.master_branch.unlock) def _check_out_of_date_tree(self): """Check that the working tree is up to date. :return: old_revision_number, old_revision_id, new_revision_number tuple """ try: first_tree_parent = self.work_tree.get_parent_ids()[0] except IndexError: # if there are no parents, treat our parent as 'None' # this is so that we still consider the master branch # - in a checkout scenario the tree may have no # parents but the branch may do. first_tree_parent = bzrlib.revision.NULL_REVISION old_revno, master_last = self.master_branch.last_revision_info() if master_last != first_tree_parent: if master_last != bzrlib.revision.NULL_REVISION: raise errors.OutOfDateTree(self.work_tree) if self.branch.repository.has_revision(first_tree_parent): new_revno = old_revno + 1 else: # ghost parents never appear in revision history. new_revno = 1 return old_revno, master_last, new_revno def _process_pre_hooks(self, old_revno, new_revno): """Process any registered pre commit hooks.""" self._set_progress_stage("Running pre_commit hooks") self._process_hooks("pre_commit", old_revno, new_revno) def _process_post_hooks(self, old_revno, new_revno): """Process any registered post commit hooks.""" # Process the post commit hooks, if any self._set_progress_stage("Running post_commit hooks") # old style commit hooks - should be deprecated ? (obsoleted in # 0.15^H^H^H^H 2.5.0) post_commit = self.config_stack.get('post_commit') if post_commit is not None: hooks = post_commit.split(' ') # this would be nicer with twisted.python.reflect.namedAny for hook in hooks: result = eval(hook + '(branch, rev_id)', {'branch':self.branch, 'bzrlib':bzrlib, 'rev_id':self.rev_id}) # process new style post commit hooks self._process_hooks("post_commit", old_revno, new_revno) def _process_hooks(self, hook_name, old_revno, new_revno): if not Branch.hooks[hook_name]: return # new style commit hooks: if not self.bound_branch: hook_master = self.branch hook_local = None else: hook_master = self.master_branch hook_local = self.branch # With bound branches, when the master is behind the local branch, # the 'old_revno' and old_revid values here are incorrect. # XXX: FIXME ^. RBC 20060206 if self.parents: old_revid = self.parents[0] else: old_revid = bzrlib.revision.NULL_REVISION if hook_name == "pre_commit": future_tree = self.builder.revision_tree() tree_delta = future_tree.changes_from(self.basis_tree, include_root=True) for hook in Branch.hooks[hook_name]: # show the running hook in the progress bar. As hooks may # end up doing nothing (e.g. because they are not configured by # the user) this is still showing progress, not showing overall # actions - its up to each plugin to show a UI if it want's to # (such as 'Emailing diff to foo@example.com'). self.pb_stage_name = "Running %s hooks [%s]" % \ (hook_name, Branch.hooks.get_hook_name(hook)) self._emit_progress() if 'hooks' in debug.debug_flags: mutter("Invoking commit hook: %r", hook) if hook_name == "post_commit": hook(hook_local, hook_master, old_revno, old_revid, new_revno, self.rev_id) elif hook_name == "pre_commit": hook(hook_local, hook_master, old_revno, old_revid, new_revno, self.rev_id, tree_delta, future_tree) def _gather_parents(self): """Record the parents of a merge for merge detection.""" # TODO: Make sure that this list doesn't contain duplicate # entries and the order is preserved when doing this. if self.use_record_iter_changes: return self.basis_inv = self.basis_tree.root_inventory self.parent_invs = [self.basis_inv] for revision in self.parents[1:]: if self.branch.repository.has_revision(revision): mutter('commit parent revision {%s}', revision) inventory = self.branch.repository.get_inventory(revision) self.parent_invs.append(inventory) else: mutter('commit parent ghost revision {%s}', revision) def _update_builder_with_changes(self): """Update the commit builder with the data about what has changed. """ exclude = self.exclude specific_files = self.specific_files mutter("Selecting files for commit with filter %s", specific_files) self._check_strict() if self.use_record_iter_changes: iter_changes = self.work_tree.iter_changes(self.basis_tree, specific_files=specific_files) iter_changes = self._filter_iter_changes(iter_changes) for file_id, path, fs_hash in self.builder.record_iter_changes( self.work_tree, self.basis_revid, iter_changes): self.work_tree._observed_sha1(file_id, path, fs_hash) else: # Build the new inventory self._populate_from_inventory() self._record_unselected() self._report_and_accumulate_deletes() def _filter_iter_changes(self, iter_changes): """Process iter_changes. This method reports on the changes in iter_changes to the user, and converts 'missing' entries in the iter_changes iterator to 'deleted' entries. 'missing' entries have their :param iter_changes: An iter_changes to process. :return: A generator of changes. """ reporter = self.reporter report_changes = reporter.is_verbose() deleted_ids = [] for change in iter_changes: if report_changes: old_path = change[1][0] new_path = change[1][1] versioned = change[3][1] kind = change[6][1] versioned = change[3][1] if kind is None and versioned: # 'missing' path if report_changes: reporter.missing(new_path) deleted_ids.append(change[0]) # Reset the new path (None) and new versioned flag (False) change = (change[0], (change[1][0], None), change[2], (change[3][0], False)) + change[4:] new_path = change[1][1] versioned = False elif kind == 'tree-reference': if self.recursive == 'down': self._commit_nested_tree(change[0], change[1][1]) if change[3][0] or change[3][1]: yield change if report_changes: if new_path is None: reporter.deleted(old_path) elif old_path is None: reporter.snapshot_change(gettext('added'), new_path) elif old_path != new_path: reporter.renamed(gettext('renamed'), old_path, new_path) else: if (new_path or self.work_tree.branch.repository._format.rich_root_data): # Don't report on changes to '' in non rich root # repositories. reporter.snapshot_change(gettext('modified'), new_path) self._next_progress_entry() # Unversion IDs that were found to be deleted self.deleted_ids = deleted_ids def _record_unselected(self): # If specific files are selected, then all un-selected files must be # recorded in their previous state. For more details, see # https://lists.ubuntu.com/archives/bazaar/2007q3/028476.html. if self.specific_files or self.exclude: specific_files = self.specific_files or [] for path, old_ie in self.basis_inv.iter_entries(): if self.builder.new_inventory.has_id(old_ie.file_id): # already added - skip. continue if (is_inside_any(specific_files, path) and not is_inside_any(self.exclude, path)): # was inside the selected path, and not excluded - if not # present it has been deleted so skip. continue # From here down it was either not selected, or was excluded: # We preserve the entry unaltered. ie = old_ie.copy() # Note: specific file commits after a merge are currently # prohibited. This test is for sanity/safety in case it's # required after that changes. if len(self.parents) > 1: ie.revision = None self.builder.record_entry_contents(ie, self.parent_invs, path, self.basis_tree, None) def _report_and_accumulate_deletes(self): if (isinstance(self.basis_inv, Inventory) and isinstance(self.builder.new_inventory, Inventory)): # the older Inventory classes provide a _byid dict, and building a # set from the keys of this dict is substantially faster than even # getting a set of ids from the inventory # # set(dict) is roughly the same speed as # set(iter(dict)) and both are significantly slower than # set(dict.keys()) deleted_ids = set(self.basis_inv._byid.keys()) - \ set(self.builder.new_inventory._byid.keys()) else: deleted_ids = set(self.basis_inv) - set(self.builder.new_inventory) if deleted_ids: self.any_entries_deleted = True deleted = [(self.basis_tree.id2path(file_id), file_id) for file_id in deleted_ids] deleted.sort() # XXX: this is not quite directory-order sorting for path, file_id in deleted: self.builder.record_delete(path, file_id) self.reporter.deleted(path) def _check_strict(self): # XXX: when we use iter_changes this would likely be faster if # iter_changes would check for us (even in the presence of # selected_files). if self.strict: # raise an exception as soon as we find a single unknown. for unknown in self.work_tree.unknowns(): raise StrictCommitFailed() def _populate_from_inventory(self): """Populate the CommitBuilder by walking the working tree inventory.""" # Build the revision inventory. # # This starts by creating a new empty inventory. Depending on # which files are selected for commit, and what is present in the # current tree, the new inventory is populated. inventory entries # which are candidates for modification have their revision set to # None; inventory entries that are carried over untouched have their # revision set to their prior value. # # ESEPARATIONOFCONCERNS: this function is diffing and using the diff # results to create a new inventory at the same time, which results # in bugs like #46635. Any reason not to use/enhance Tree.changes_from? # ADHB 11-07-2006 specific_files = self.specific_files exclude = self.exclude report_changes = self.reporter.is_verbose() deleted_ids = [] # A tree of paths that have been deleted. E.g. if foo/bar has been # deleted, then we have {'foo':{'bar':{}}} deleted_paths = {} # XXX: Note that entries may have the wrong kind because the entry does # not reflect the status on disk. # NB: entries will include entries within the excluded ids/paths # because iter_entries_by_dir has no 'exclude' facility today. entries = self.work_tree.iter_entries_by_dir( specific_file_ids=self.specific_file_ids, yield_parents=True) for path, existing_ie in entries: file_id = existing_ie.file_id name = existing_ie.name parent_id = existing_ie.parent_id kind = existing_ie.kind # Skip files that have been deleted from the working tree. # The deleted path ids are also recorded so they can be explicitly # unversioned later. if deleted_paths: path_segments = splitpath(path) deleted_dict = deleted_paths for segment in path_segments: deleted_dict = deleted_dict.get(segment, None) if not deleted_dict: # We either took a path not present in the dict # (deleted_dict was None), or we've reached an empty # child dir in the dict, so are now a sub-path. break else: deleted_dict = None if deleted_dict is not None: # the path has a deleted parent, do not add it. continue if exclude and is_inside_any(exclude, path): # Skip excluded paths. Excluded paths are processed by # _update_builder_with_changes. continue content_summary = self.work_tree.path_content_summary(path) kind = content_summary[0] # Note that when a filter of specific files is given, we must only # skip/record deleted files matching that filter. if not specific_files or is_inside_any(specific_files, path): if kind == 'missing': if not deleted_paths: # path won't have been split yet. path_segments = splitpath(path) deleted_dict = deleted_paths for segment in path_segments: deleted_dict = deleted_dict.setdefault(segment, {}) self.reporter.missing(path) self._next_progress_entry() deleted_ids.append(file_id) continue # TODO: have the builder do the nested commit just-in-time IF and # only if needed. if kind == 'tree-reference': # enforce repository nested tree policy. if (not self.work_tree.supports_tree_reference() or # repository does not support it either. not self.branch.repository._format.supports_tree_reference): kind = 'directory' content_summary = (kind, None, None, None) elif self.recursive == 'down': nested_revision_id = self._commit_nested_tree( file_id, path) content_summary = (kind, None, None, nested_revision_id) else: nested_revision_id = self.work_tree.get_reference_revision(file_id) content_summary = (kind, None, None, nested_revision_id) # Record an entry for this item # Note: I don't particularly want to have the existing_ie # parameter but the test suite currently (28-Jun-07) breaks # without it thanks to a unicode normalisation issue. :-( definitely_changed = kind != existing_ie.kind self._record_entry(path, file_id, specific_files, kind, name, parent_id, definitely_changed, existing_ie, report_changes, content_summary) # Unversion IDs that were found to be deleted self.deleted_ids = deleted_ids def _commit_nested_tree(self, file_id, path): "Commit a nested tree." sub_tree = self.work_tree.get_nested_tree(file_id, path) # FIXME: be more comprehensive here: # this works when both trees are in --trees repository, # but when both are bound to a different repository, # it fails; a better way of approaching this is to # finally implement the explicit-caches approach design # a while back - RBC 20070306. if sub_tree.branch.repository.has_same_location( self.work_tree.branch.repository): sub_tree.branch.repository = \ self.work_tree.branch.repository try: return sub_tree.commit(message=None, revprops=self.revprops, recursive=self.recursive, message_callback=self.message_callback, timestamp=self.timestamp, timezone=self.timezone, committer=self.committer, allow_pointless=self.allow_pointless, strict=self.strict, verbose=self.verbose, local=self.local, reporter=self.reporter) except errors.PointlessCommit: return self.work_tree.get_reference_revision(file_id) def _record_entry(self, path, file_id, specific_files, kind, name, parent_id, definitely_changed, existing_ie, report_changes, content_summary): "Record the new inventory entry for a path if any." # mutter('check %s {%s}', path, file_id) # mutter('%s selected for commit', path) if definitely_changed or existing_ie is None: ie = make_entry(kind, name, parent_id, file_id) else: ie = existing_ie.copy() ie.revision = None # For carried over entries we don't care about the fs hash - the repo # isn't generating a sha, so we're not saving computation time. _, _, fs_hash = self.builder.record_entry_contents( ie, self.parent_invs, path, self.work_tree, content_summary) if report_changes: self._report_change(ie, path) if fs_hash: self.work_tree._observed_sha1(ie.file_id, path, fs_hash) return ie def _report_change(self, ie, path): """Report a change to the user. The change that has occurred is described relative to the basis inventory. """ if (self.basis_inv.has_id(ie.file_id)): basis_ie = self.basis_inv[ie.file_id] else: basis_ie = None change = ie.describe_change(basis_ie, ie) if change in (InventoryEntry.RENAMED, InventoryEntry.MODIFIED_AND_RENAMED): old_path = self.basis_inv.id2path(ie.file_id) self.reporter.renamed(change, old_path, path) self._next_progress_entry() else: if change == gettext('unchanged'): return self.reporter.snapshot_change(change, path) self._next_progress_entry() def _set_progress_stage(self, name, counter=False): """Set the progress stage and emit an update to the progress bar.""" self.pb_stage_name = name self.pb_stage_count += 1 if counter: self.pb_entries_count = 0 else: self.pb_entries_count = None self._emit_progress() def _next_progress_entry(self): """Emit an update to the progress bar and increment the entry count.""" self.pb_entries_count += 1 self._emit_progress() def _emit_progress(self): if self.pb_entries_count is not None: text = gettext("{0} [{1}] - Stage").format(self.pb_stage_name, self.pb_entries_count) else: text = gettext("%s - Stage") % (self.pb_stage_name, ) self.pb.update(text, self.pb_stage_count, self.pb_stage_total) def _set_specific_file_ids(self): """populate self.specific_file_ids if we will use it.""" if not self.use_record_iter_changes: # If provided, ensure the specified files are versioned if self.specific_files is not None: # Note: This routine is being called because it raises # PathNotVersionedError as a side effect of finding the IDs. We # later use the ids we found as input to the working tree # inventory iterator, so we only consider those ids rather than # examining the whole tree again. # XXX: Dont we have filter_unversioned to do this more # cheaply? self.specific_file_ids = tree.find_ids_across_trees( self.specific_files, [self.basis_tree, self.work_tree]) else: self.specific_file_ids = None bzr-2.7.0/bzrlib/commit_signature_commands.py0000644000000000000000000001645612323427302017545 0ustar 00000000000000# Copyright (C) 2006, 2007, 2009, 2010, 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Command which looks for unsigned commits by the current user, and signs them. """ from __future__ import absolute_import from bzrlib import ( controldir, errors, gpg, revision as _mod_revision, ) from bzrlib.commands import Command from bzrlib.option import Option from bzrlib.i18n import gettext, ngettext class cmd_sign_my_commits(Command): __doc__ = """Sign all commits by a given committer. If location is not specified the local tree is used. If committer is not specified the default committer is used. This does not sign commits that already have signatures. """ # Note that this signs everything on the branch's ancestry # (both mainline and merged), but not other revisions that may be in the # repository takes_options = [ Option('dry-run', help='Don\'t actually sign anything, just print' ' the revisions that would be signed.'), ] takes_args = ['location?', 'committer?'] def run(self, location=None, committer=None, dry_run=False): if location is None: bzrdir = controldir.ControlDir.open_containing('.')[0] else: # Passed in locations should be exact bzrdir = controldir.ControlDir.open(location) branch = bzrdir.open_branch() repo = branch.repository branch_config = branch.get_config_stack() if committer is None: committer = branch_config.get('email') gpg_strategy = gpg.GPGStrategy(branch_config) count = 0 repo.lock_write() try: graph = repo.get_graph() repo.start_write_group() try: for rev_id, parents in graph.iter_ancestry( [branch.last_revision()]): if _mod_revision.is_null(rev_id): continue if parents is None: # Ignore ghosts continue if repo.has_signature_for_revision_id(rev_id): continue rev = repo.get_revision(rev_id) if rev.committer != committer: continue # We have a revision without a signature who has a # matching committer, start signing self.outf.write("%s\n" % rev_id) count += 1 if not dry_run: repo.sign_revision(rev_id, gpg_strategy) except: repo.abort_write_group() raise else: repo.commit_write_group() finally: repo.unlock() self.outf.write( ngettext('Signed %d revision.\n', 'Signed %d revisions.\n', count) % count) class cmd_verify_signatures(Command): __doc__ = """Verify all commit signatures. Verifies that all commits in the branch are signed by known GnuPG keys. """ takes_options = [ Option('acceptable-keys', help='Comma separated list of GPG key patterns which are' ' acceptable for verification.', short_name='k', type=str,), 'revision', 'verbose', ] takes_args = ['location?'] def run(self, acceptable_keys=None, revision=None, verbose=None, location=u'.'): bzrdir = controldir.ControlDir.open_containing(location)[0] branch = bzrdir.open_branch() repo = branch.repository branch_config = branch.get_config_stack() gpg_strategy = gpg.GPGStrategy(branch_config) gpg_strategy.set_acceptable_keys(acceptable_keys) def write(string): self.outf.write(string + "\n") def write_verbose(string): self.outf.write(" " + string + "\n") self.add_cleanup(repo.lock_read().unlock) #get our list of revisions revisions = [] if revision is not None: if len(revision) == 1: revno, rev_id = revision[0].in_history(branch) revisions.append(rev_id) elif len(revision) == 2: from_revno, from_revid = revision[0].in_history(branch) to_revno, to_revid = revision[1].in_history(branch) if to_revid is None: to_revno = branch.revno() if from_revno is None or to_revno is None: raise errors.BzrCommandError(gettext( 'Cannot verify a range of non-revision-history revisions')) for revno in range(from_revno, to_revno + 1): revisions.append(branch.get_rev_id(revno)) else: #all revisions by default including merges graph = repo.get_graph() revisions = [] for rev_id, parents in graph.iter_ancestry( [branch.last_revision()]): if _mod_revision.is_null(rev_id): continue if parents is None: # Ignore ghosts continue revisions.append(rev_id) count, result, all_verifiable = gpg.bulk_verify_signatures( repo, revisions, gpg_strategy) if all_verifiable: write(gettext("All commits signed with verifiable keys")) if verbose: for message in gpg.verbose_valid_message(result): write_verbose(message) return 0 else: write(gpg.valid_commits_message(count)) if verbose: for message in gpg.verbose_valid_message(result): write_verbose(message) write(gpg.expired_commit_message(count)) if verbose: for message in gpg.verbose_expired_key_message(result, repo): write_verbose(message) write(gpg.unknown_key_message(count)) if verbose: for message in gpg.verbose_missing_key_message(result): write_verbose(message) write(gpg.commit_not_valid_message(count)) if verbose: for message in gpg.verbose_not_valid_message(result, repo): write_verbose(message) write(gpg.commit_not_signed_message(count)) if verbose: for message in gpg.verbose_not_signed_message(result, repo): write_verbose(message) return 1 bzr-2.7.0/bzrlib/config.py0000644000000000000000000046466712650142037013575 0ustar 00000000000000# Copyright (C) 2005-2014, 2016 Canonical Ltd # Authors: Robert Collins # and others # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Configuration that affects the behaviour of Bazaar. Currently this configuration resides in ~/.bazaar/bazaar.conf and ~/.bazaar/locations.conf, which is written to by bzr. In bazaar.conf the following options may be set: [DEFAULT] editor=name-of-program email=Your Name check_signatures=require|ignore|check-available(default) create_signatures=always|never|when-required(default) gpg_signing_command=name-of-program log_format=name-of-format validate_signatures_in_log=true|false(default) acceptable_keys=pattern1,pattern2 gpg_signing_key=amy@example.com in locations.conf, you specify the url of a branch and options for it. Wildcards may be used - * and ? as normal in shell completion. Options set in both bazaar.conf and locations.conf are overridden by the locations.conf setting. [/home/robertc/source] recurse=False|True(default) email= as above check_signatures= as above create_signatures= as above. validate_signatures_in_log=as above acceptable_keys=as above explanation of options ---------------------- editor - this option sets the pop up editor to use during commits. email - this option sets the user id bzr will use when committing. check_signatures - this option will control whether bzr will require good gpg signatures, ignore them, or check them if they are present. Currently it is unused except that check_signatures turns on create_signatures. create_signatures - this option controls whether bzr will always create gpg signatures or not on commits. There is an unused option which in future is expected to work if branch settings require signatures. log_format - this option sets the default log format. Possible values are long, short, line, or a plugin can register new formats. validate_signatures_in_log - show GPG signature validity in log output acceptable_keys - comma separated list of key patterns acceptable for verify-signatures command In bazaar.conf you can also define aliases in the ALIASES sections, example [ALIASES] lastlog=log --line -r-10..-1 ll=log --line -r-10..-1 h=help up=pull """ from __future__ import absolute_import from cStringIO import StringIO import os import sys import bzrlib from bzrlib.decorators import needs_write_lock from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import base64 import fnmatch import re from bzrlib import ( atomicfile, controldir, debug, directory_service, errors, lazy_regex, library_state, lockdir, mergetools, osutils, symbol_versioning, trace, transport, ui, urlutils, win32utils, ) from bzrlib.i18n import gettext from bzrlib.util.configobj import configobj """) from bzrlib import ( commands, hooks, lazy_regex, registry, ) from bzrlib.symbol_versioning import ( deprecated_in, deprecated_method, ) CHECK_IF_POSSIBLE=0 CHECK_ALWAYS=1 CHECK_NEVER=2 SIGN_WHEN_REQUIRED=0 SIGN_ALWAYS=1 SIGN_NEVER=2 POLICY_NONE = 0 POLICY_NORECURSE = 1 POLICY_APPENDPATH = 2 _policy_name = { POLICY_NONE: None, POLICY_NORECURSE: 'norecurse', POLICY_APPENDPATH: 'appendpath', } _policy_value = { None: POLICY_NONE, 'none': POLICY_NONE, 'norecurse': POLICY_NORECURSE, 'appendpath': POLICY_APPENDPATH, } STORE_LOCATION = POLICY_NONE STORE_LOCATION_NORECURSE = POLICY_NORECURSE STORE_LOCATION_APPENDPATH = POLICY_APPENDPATH STORE_BRANCH = 3 STORE_GLOBAL = 4 def signature_policy_from_unicode(signature_string): """Convert a string to a signing policy.""" if signature_string.lower() == 'check-available': return CHECK_IF_POSSIBLE if signature_string.lower() == 'ignore': return CHECK_NEVER if signature_string.lower() == 'require': return CHECK_ALWAYS raise ValueError("Invalid signatures policy '%s'" % signature_string) def signing_policy_from_unicode(signature_string): """Convert a string to a signing policy.""" if signature_string.lower() == 'when-required': return SIGN_WHEN_REQUIRED if signature_string.lower() == 'never': return SIGN_NEVER if signature_string.lower() == 'always': return SIGN_ALWAYS raise ValueError("Invalid signing policy '%s'" % signature_string) class ConfigObj(configobj.ConfigObj): def __init__(self, infile=None, **kwargs): # We define our own interpolation mechanism calling it option expansion super(ConfigObj, self).__init__(infile=infile, interpolation=False, **kwargs) def get_bool(self, section, key): return self[section].as_bool(key) def get_value(self, section, name): # Try [] for the old DEFAULT section. if section == "DEFAULT": try: return self[name] except KeyError: pass return self[section][name] class Config(object): """A configuration policy - what username, editor, gpg needs etc.""" def __init__(self): super(Config, self).__init__() def config_id(self): """Returns a unique ID for the config.""" raise NotImplementedError(self.config_id) def get_change_editor(self, old_tree, new_tree): from bzrlib import diff cmd = self._get_change_editor() if cmd is None: return None return diff.DiffFromTool.from_string(cmd, old_tree, new_tree, sys.stdout) def _get_signature_checking(self): """Template method to override signature checking policy.""" def _get_signing_policy(self): """Template method to override signature creation policy.""" option_ref_re = None def expand_options(self, string, env=None): """Expand option references in the string in the configuration context. :param string: The string containing option to expand. :param env: An option dict defining additional configuration options or overriding existing ones. :returns: The expanded string. """ return self._expand_options_in_string(string, env) def _expand_options_in_list(self, slist, env=None, _ref_stack=None): """Expand options in a list of strings in the configuration context. :param slist: A list of strings. :param env: An option dict defining additional configuration options or overriding existing ones. :param _ref_stack: Private list containing the options being expanded to detect loops. :returns: The flatten list of expanded strings. """ # expand options in each value separately flattening lists result = [] for s in slist: value = self._expand_options_in_string(s, env, _ref_stack) if isinstance(value, list): result.extend(value) else: result.append(value) return result def _expand_options_in_string(self, string, env=None, _ref_stack=None): """Expand options in the string in the configuration context. :param string: The string to be expanded. :param env: An option dict defining additional configuration options or overriding existing ones. :param _ref_stack: Private list containing the options being expanded to detect loops. :returns: The expanded string. """ if string is None: # Not much to expand there return None if _ref_stack is None: # What references are currently resolved (to detect loops) _ref_stack = [] if self.option_ref_re is None: # We want to match the most embedded reference first (i.e. for # '{{foo}}' we will get '{foo}', # for '{bar{baz}}' we will get '{baz}' self.option_ref_re = re.compile('({[^{}]+})') result = string # We need to iterate until no more refs appear ({{foo}} will need two # iterations for example). while True: raw_chunks = self.option_ref_re.split(result) if len(raw_chunks) == 1: # Shorcut the trivial case: no refs return result chunks = [] list_value = False # Split will isolate refs so that every other chunk is a ref chunk_is_ref = False for chunk in raw_chunks: if not chunk_is_ref: if chunk: # Keep only non-empty strings (or we get bogus empty # slots when a list value is involved). chunks.append(chunk) chunk_is_ref = True else: name = chunk[1:-1] if name in _ref_stack: raise errors.OptionExpansionLoop(string, _ref_stack) _ref_stack.append(name) value = self._expand_option(name, env, _ref_stack) if value is None: raise errors.ExpandingUnknownOption(name, string) if isinstance(value, list): list_value = True chunks.extend(value) else: chunks.append(value) _ref_stack.pop() chunk_is_ref = False if list_value: # Once a list appears as the result of an expansion, all # callers will get a list result. This allows a consistent # behavior even when some options in the expansion chain # defined as strings (no comma in their value) but their # expanded value is a list. return self._expand_options_in_list(chunks, env, _ref_stack) else: result = ''.join(chunks) return result def _expand_option(self, name, env, _ref_stack): if env is not None and name in env: # Special case, values provided in env takes precedence over # anything else value = env[name] else: # FIXME: This is a limited implementation, what we really need is a # way to query the bzr config for the value of an option, # respecting the scope rules (That is, once we implement fallback # configs, getting the option value should restart from the top # config, not the current one) -- vila 20101222 value = self.get_user_option(name, expand=False) if isinstance(value, list): value = self._expand_options_in_list(value, env, _ref_stack) else: value = self._expand_options_in_string(value, env, _ref_stack) return value def _get_user_option(self, option_name): """Template method to provide a user option.""" return None def get_user_option(self, option_name, expand=True): """Get a generic option - no special process, no default. :param option_name: The queried option. :param expand: Whether options references should be expanded. :returns: The value of the option. """ value = self._get_user_option(option_name) if expand: if isinstance(value, list): value = self._expand_options_in_list(value) elif isinstance(value, dict): trace.warning('Cannot expand "%s":' ' Dicts do not support option expansion' % (option_name,)) else: value = self._expand_options_in_string(value) for hook in OldConfigHooks['get']: hook(self, option_name, value) return value def get_user_option_as_bool(self, option_name, expand=None, default=None): """Get a generic option as a boolean. :param expand: Allow expanding references to other config values. :param default: Default value if nothing is configured :return None if the option doesn't exist or its value can't be interpreted as a boolean. Returns True or False otherwise. """ s = self.get_user_option(option_name, expand=expand) if s is None: # The option doesn't exist return default val = ui.bool_from_string(s) if val is None: # The value can't be interpreted as a boolean trace.warning('Value "%s" is not a boolean for "%s"', s, option_name) return val def get_user_option_as_list(self, option_name, expand=None): """Get a generic option as a list - no special process, no default. :return None if the option doesn't exist. Returns the value as a list otherwise. """ l = self.get_user_option(option_name, expand=expand) if isinstance(l, (str, unicode)): # A single value, most probably the user forgot (or didn't care to # add) the final ',' l = [l] return l @deprecated_method(deprecated_in((2, 5, 0))) def get_user_option_as_int_from_SI(self, option_name, default=None): """Get a generic option from a human readable size in SI units, e.g 10MB Accepted suffixes are K,M,G. It is case-insensitive and may be followed by a trailing b (i.e. Kb, MB). This is intended to be practical and not pedantic. :return Integer, expanded to its base-10 value if a proper SI unit is found. If the option doesn't exist, or isn't a value in SI units, return default (which defaults to None) """ val = self.get_user_option(option_name) if isinstance(val, list): val = val[0] if val is None: val = default else: p = re.compile("^(\d+)([kmg])*b*$", re.IGNORECASE) try: m = p.match(val) if m is not None: val = int(m.group(1)) if m.group(2) is not None: if m.group(2).lower() == 'k': val *= 10**3 elif m.group(2).lower() == 'm': val *= 10**6 elif m.group(2).lower() == 'g': val *= 10**9 else: ui.ui_factory.show_warning(gettext('Invalid config value for "{0}" ' ' value {1!r} is not an SI unit.').format( option_name, val)) val = default except TypeError: val = default return val @deprecated_method(deprecated_in((2, 5, 0))) def gpg_signing_command(self): """What program should be used to sign signatures?""" result = self._gpg_signing_command() if result is None: result = "gpg" return result def _gpg_signing_command(self): """See gpg_signing_command().""" return None @deprecated_method(deprecated_in((2, 5, 0))) def log_format(self): """What log format should be used""" result = self._log_format() if result is None: result = "long" return result def _log_format(self): """See log_format().""" return None def validate_signatures_in_log(self): """Show GPG signature validity in log""" result = self._validate_signatures_in_log() if result == "true": result = True else: result = False return result def _validate_signatures_in_log(self): """See validate_signatures_in_log().""" return None @deprecated_method(deprecated_in((2, 5, 0))) def acceptable_keys(self): """Comma separated list of key patterns acceptable to verify-signatures command""" result = self._acceptable_keys() return result def _acceptable_keys(self): """See acceptable_keys().""" return None @deprecated_method(deprecated_in((2, 5, 0))) def post_commit(self): """An ordered list of python functions to call. Each function takes branch, rev_id as parameters. """ return self._post_commit() def _post_commit(self): """See Config.post_commit.""" return None def user_email(self): """Return just the email component of a username.""" return extract_email_address(self.username()) def username(self): """Return email-style username. Something similar to 'Martin Pool ' $BZR_EMAIL can be set to override this, then the concrete policy type is checked, and finally $EMAIL is examined. If no username can be found, errors.NoWhoami exception is raised. """ v = os.environ.get('BZR_EMAIL') if v: return v.decode(osutils.get_user_encoding()) v = self._get_user_id() if v: return v return default_email() def ensure_username(self): """Raise errors.NoWhoami if username is not set. This method relies on the username() function raising the error. """ self.username() @deprecated_method(deprecated_in((2, 5, 0))) def signature_checking(self): """What is the current policy for signature checking?.""" policy = self._get_signature_checking() if policy is not None: return policy return CHECK_IF_POSSIBLE @deprecated_method(deprecated_in((2, 5, 0))) def signing_policy(self): """What is the current policy for signature checking?.""" policy = self._get_signing_policy() if policy is not None: return policy return SIGN_WHEN_REQUIRED @deprecated_method(deprecated_in((2, 5, 0))) def signature_needed(self): """Is a signature needed when committing ?.""" policy = self._get_signing_policy() if policy is None: policy = self._get_signature_checking() if policy is not None: #this warning should go away once check_signatures is #implemented (if not before) trace.warning("Please use create_signatures," " not check_signatures to set signing policy.") elif policy == SIGN_ALWAYS: return True return False @deprecated_method(deprecated_in((2, 5, 0))) def gpg_signing_key(self): """GPG user-id to sign commits""" key = self.get_user_option('gpg_signing_key') if key == "default" or key == None: return self.user_email() else: return key def get_alias(self, value): return self._get_alias(value) def _get_alias(self, value): pass def get_nickname(self): return self._get_nickname() def _get_nickname(self): return None def get_bzr_remote_path(self): try: return os.environ['BZR_REMOTE_PATH'] except KeyError: path = self.get_user_option("bzr_remote_path") if path is None: path = 'bzr' return path def suppress_warning(self, warning): """Should the warning be suppressed or emitted. :param warning: The name of the warning being tested. :returns: True if the warning should be suppressed, False otherwise. """ warnings = self.get_user_option_as_list('suppress_warnings') if warnings is None or warning not in warnings: return False else: return True def get_merge_tools(self): tools = {} for (oname, value, section, conf_id, parser) in self._get_options(): if oname.startswith('bzr.mergetool.'): tool_name = oname[len('bzr.mergetool.'):] tools[tool_name] = self.get_user_option(oname, False) trace.mutter('loaded merge tools: %r' % tools) return tools def find_merge_tool(self, name): # We fake a defaults mechanism here by checking if the given name can # be found in the known_merge_tools if it's not found in the config. # This should be done through the proposed config defaults mechanism # when it becomes available in the future. command_line = (self.get_user_option('bzr.mergetool.%s' % name, expand=False) or mergetools.known_merge_tools.get(name, None)) return command_line class _ConfigHooks(hooks.Hooks): """A dict mapping hook names and a list of callables for configs. """ def __init__(self): """Create the default hooks. These are all empty initially, because by default nothing should get notified. """ super(_ConfigHooks, self).__init__('bzrlib.config', 'ConfigHooks') self.add_hook('load', 'Invoked when a config store is loaded.' ' The signature is (store).', (2, 4)) self.add_hook('save', 'Invoked when a config store is saved.' ' The signature is (store).', (2, 4)) # The hooks for config options self.add_hook('get', 'Invoked when a config option is read.' ' The signature is (stack, name, value).', (2, 4)) self.add_hook('set', 'Invoked when a config option is set.' ' The signature is (stack, name, value).', (2, 4)) self.add_hook('remove', 'Invoked when a config option is removed.' ' The signature is (stack, name).', (2, 4)) ConfigHooks = _ConfigHooks() class _OldConfigHooks(hooks.Hooks): """A dict mapping hook names and a list of callables for configs. """ def __init__(self): """Create the default hooks. These are all empty initially, because by default nothing should get notified. """ super(_OldConfigHooks, self).__init__('bzrlib.config', 'OldConfigHooks') self.add_hook('load', 'Invoked when a config store is loaded.' ' The signature is (config).', (2, 4)) self.add_hook('save', 'Invoked when a config store is saved.' ' The signature is (config).', (2, 4)) # The hooks for config options self.add_hook('get', 'Invoked when a config option is read.' ' The signature is (config, name, value).', (2, 4)) self.add_hook('set', 'Invoked when a config option is set.' ' The signature is (config, name, value).', (2, 4)) self.add_hook('remove', 'Invoked when a config option is removed.' ' The signature is (config, name).', (2, 4)) OldConfigHooks = _OldConfigHooks() class IniBasedConfig(Config): """A configuration policy that draws from ini files.""" def __init__(self, get_filename=symbol_versioning.DEPRECATED_PARAMETER, file_name=None): """Base class for configuration files using an ini-like syntax. :param file_name: The configuration file path. """ super(IniBasedConfig, self).__init__() self.file_name = file_name if symbol_versioning.deprecated_passed(get_filename): symbol_versioning.warn( 'IniBasedConfig.__init__(get_filename) was deprecated in 2.3.' ' Use file_name instead.', DeprecationWarning, stacklevel=2) if get_filename is not None: self.file_name = get_filename() else: self.file_name = file_name self._content = None self._parser = None @classmethod def from_string(cls, str_or_unicode, file_name=None, save=False): """Create a config object from a string. :param str_or_unicode: A string representing the file content. This will be utf-8 encoded. :param file_name: The configuration file path. :param _save: Whether the file should be saved upon creation. """ conf = cls(file_name=file_name) conf._create_from_string(str_or_unicode, save) return conf def _create_from_string(self, str_or_unicode, save): self._content = StringIO(str_or_unicode.encode('utf-8')) # Some tests use in-memory configs, some other always need the config # file to exist on disk. if save: self._write_config_file() def _get_parser(self, file=symbol_versioning.DEPRECATED_PARAMETER): if self._parser is not None: return self._parser if symbol_versioning.deprecated_passed(file): symbol_versioning.warn( 'IniBasedConfig._get_parser(file=xxx) was deprecated in 2.3.' ' Use IniBasedConfig(_content=xxx) instead.', DeprecationWarning, stacklevel=2) if self._content is not None: co_input = self._content elif self.file_name is None: raise AssertionError('We have no content to create the config') else: co_input = self.file_name try: self._parser = ConfigObj(co_input, encoding='utf-8') except configobj.ConfigObjError, e: raise errors.ParseConfigError(e.errors, e.config.filename) except UnicodeDecodeError: raise errors.ConfigContentError(self.file_name) # Make sure self.reload() will use the right file name self._parser.filename = self.file_name for hook in OldConfigHooks['load']: hook(self) return self._parser def reload(self): """Reload the config file from disk.""" if self.file_name is None: raise AssertionError('We need a file name to reload the config') if self._parser is not None: self._parser.reload() for hook in ConfigHooks['load']: hook(self) def _get_matching_sections(self): """Return an ordered list of (section_name, extra_path) pairs. If the section contains inherited configuration, extra_path is a string containing the additional path components. """ section = self._get_section() if section is not None: return [(section, '')] else: return [] def _get_section(self): """Override this to define the section used by the config.""" return "DEFAULT" def _get_sections(self, name=None): """Returns an iterator of the sections specified by ``name``. :param name: The section name. If None is supplied, the default configurations are yielded. :return: A tuple (name, section, config_id) for all sections that will be walked by user_get_option() in the 'right' order. The first one is where set_user_option() will update the value. """ parser = self._get_parser() if name is not None: yield (name, parser[name], self.config_id()) else: # No section name has been given so we fallback to the configobj # itself which holds the variables defined outside of any section. yield (None, parser, self.config_id()) def _get_options(self, sections=None): """Return an ordered list of (name, value, section, config_id) tuples. All options are returned with their associated value and the section they appeared in. ``config_id`` is a unique identifier for the configuration file the option is defined in. :param sections: Default to ``_get_matching_sections`` if not specified. This gives a better control to daughter classes about which sections should be searched. This is a list of (name, configobj) tuples. """ opts = [] if sections is None: parser = self._get_parser() sections = [] for (section_name, _) in self._get_matching_sections(): try: section = parser[section_name] except KeyError: # This could happen for an empty file for which we define a # DEFAULT section. FIXME: Force callers to provide sections # instead ? -- vila 20100930 continue sections.append((section_name, section)) config_id = self.config_id() for (section_name, section) in sections: for (name, value) in section.iteritems(): yield (name, parser._quote(value), section_name, config_id, parser) def _get_option_policy(self, section, option_name): """Return the policy for the given (section, option_name) pair.""" return POLICY_NONE def _get_change_editor(self): return self.get_user_option('change_editor') def _get_signature_checking(self): """See Config._get_signature_checking.""" policy = self._get_user_option('check_signatures') if policy: return signature_policy_from_unicode(policy) def _get_signing_policy(self): """See Config._get_signing_policy""" policy = self._get_user_option('create_signatures') if policy: return signing_policy_from_unicode(policy) def _get_user_id(self): """Get the user id from the 'email' key in the current section.""" return self._get_user_option('email') def _get_user_option(self, option_name): """See Config._get_user_option.""" for (section, extra_path) in self._get_matching_sections(): try: value = self._get_parser().get_value(section, option_name) except KeyError: continue policy = self._get_option_policy(section, option_name) if policy == POLICY_NONE: return value elif policy == POLICY_NORECURSE: # norecurse items only apply to the exact path if extra_path: continue else: return value elif policy == POLICY_APPENDPATH: if extra_path: value = urlutils.join(value, extra_path) return value else: raise AssertionError('Unexpected config policy %r' % policy) else: return None def _gpg_signing_command(self): """See Config.gpg_signing_command.""" return self._get_user_option('gpg_signing_command') def _log_format(self): """See Config.log_format.""" return self._get_user_option('log_format') def _validate_signatures_in_log(self): """See Config.validate_signatures_in_log.""" return self._get_user_option('validate_signatures_in_log') def _acceptable_keys(self): """See Config.acceptable_keys.""" return self._get_user_option('acceptable_keys') def _post_commit(self): """See Config.post_commit.""" return self._get_user_option('post_commit') def _get_alias(self, value): try: return self._get_parser().get_value("ALIASES", value) except KeyError: pass def _get_nickname(self): return self.get_user_option('nickname') def remove_user_option(self, option_name, section_name=None): """Remove a user option and save the configuration file. :param option_name: The option to be removed. :param section_name: The section the option is defined in, default to the default section. """ self.reload() parser = self._get_parser() if section_name is None: section = parser else: section = parser[section_name] try: del section[option_name] except KeyError: raise errors.NoSuchConfigOption(option_name) self._write_config_file() for hook in OldConfigHooks['remove']: hook(self, option_name) def _write_config_file(self): if self.file_name is None: raise AssertionError('We cannot save, self.file_name is None') conf_dir = os.path.dirname(self.file_name) ensure_config_dir_exists(conf_dir) atomic_file = atomicfile.AtomicFile(self.file_name) self._get_parser().write(atomic_file) atomic_file.commit() atomic_file.close() osutils.copy_ownership_from_path(self.file_name) for hook in OldConfigHooks['save']: hook(self) class LockableConfig(IniBasedConfig): """A configuration needing explicit locking for access. If several processes try to write the config file, the accesses need to be serialized. Daughter classes should decorate all methods that update a config with the ``@needs_write_lock`` decorator (they call, directly or indirectly, the ``_write_config_file()`` method. These methods (typically ``set_option()`` and variants must reload the config file from disk before calling ``_write_config_file()``), this can be achieved by calling the ``self.reload()`` method. Note that the lock scope should cover both the reading and the writing of the config file which is why the decorator can't be applied to ``_write_config_file()`` only. This should be enough to implement the following logic: - lock for exclusive write access, - reload the config file from disk, - set the new value - unlock This logic guarantees that a writer can update a value without erasing an update made by another writer. """ lock_name = 'lock' def __init__(self, file_name): super(LockableConfig, self).__init__(file_name=file_name) self.dir = osutils.dirname(osutils.safe_unicode(self.file_name)) # FIXME: It doesn't matter that we don't provide possible_transports # below since this is currently used only for local config files ; # local transports are not shared. But if/when we start using # LockableConfig for other kind of transports, we will need to reuse # whatever connection is already established -- vila 20100929 self.transport = transport.get_transport_from_path(self.dir) self._lock = lockdir.LockDir(self.transport, self.lock_name) def _create_from_string(self, unicode_bytes, save): super(LockableConfig, self)._create_from_string(unicode_bytes, False) if save: # We need to handle the saving here (as opposed to IniBasedConfig) # to be able to lock self.lock_write() self._write_config_file() self.unlock() def lock_write(self, token=None): """Takes a write lock in the directory containing the config file. If the directory doesn't exist it is created. """ ensure_config_dir_exists(self.dir) return self._lock.lock_write(token) def unlock(self): self._lock.unlock() def break_lock(self): self._lock.break_lock() @needs_write_lock def remove_user_option(self, option_name, section_name=None): super(LockableConfig, self).remove_user_option(option_name, section_name) def _write_config_file(self): if self._lock is None or not self._lock.is_held: # NB: if the following exception is raised it probably means a # missing @needs_write_lock decorator on one of the callers. raise errors.ObjectNotLocked(self) super(LockableConfig, self)._write_config_file() class GlobalConfig(LockableConfig): """The configuration that should be used for a specific location.""" def __init__(self): super(GlobalConfig, self).__init__(file_name=config_filename()) def config_id(self): return 'bazaar' @classmethod def from_string(cls, str_or_unicode, save=False): """Create a config object from a string. :param str_or_unicode: A string representing the file content. This will be utf-8 encoded. :param save: Whether the file should be saved upon creation. """ conf = cls() conf._create_from_string(str_or_unicode, save) return conf @needs_write_lock def set_user_option(self, option, value): """Save option and its value in the configuration.""" self._set_option(option, value, 'DEFAULT') def get_aliases(self): """Return the aliases section.""" if 'ALIASES' in self._get_parser(): return self._get_parser()['ALIASES'] else: return {} @needs_write_lock def set_alias(self, alias_name, alias_command): """Save the alias in the configuration.""" self._set_option(alias_name, alias_command, 'ALIASES') @needs_write_lock def unset_alias(self, alias_name): """Unset an existing alias.""" self.reload() aliases = self._get_parser().get('ALIASES') if not aliases or alias_name not in aliases: raise errors.NoSuchAlias(alias_name) del aliases[alias_name] self._write_config_file() def _set_option(self, option, value, section): self.reload() self._get_parser().setdefault(section, {})[option] = value self._write_config_file() for hook in OldConfigHooks['set']: hook(self, option, value) def _get_sections(self, name=None): """See IniBasedConfig._get_sections().""" parser = self._get_parser() # We don't give access to options defined outside of any section, we # used the DEFAULT section by... default. if name in (None, 'DEFAULT'): # This could happen for an empty file where the DEFAULT section # doesn't exist yet. So we force DEFAULT when yielding name = 'DEFAULT' if 'DEFAULT' not in parser: parser['DEFAULT']= {} yield (name, parser[name], self.config_id()) @needs_write_lock def remove_user_option(self, option_name, section_name=None): if section_name is None: # We need to force the default section. section_name = 'DEFAULT' # We need to avoid the LockableConfig implementation or we'll lock # twice super(LockableConfig, self).remove_user_option(option_name, section_name) def _iter_for_location_by_parts(sections, location): """Keep only the sessions matching the specified location. :param sections: An iterable of section names. :param location: An url or a local path to match against. :returns: An iterator of (section, extra_path, nb_parts) where nb is the number of path components in the section name, section is the section name and extra_path is the difference between location and the section name. ``location`` will always be a local path and never a 'file://' url but the section names themselves can be in either form. """ location_parts = location.rstrip('/').split('/') for section in sections: # location is a local path if possible, so we need to convert 'file://' # urls in section names to local paths if necessary. # This also avoids having file:///path be a more exact # match than '/path'. # FIXME: This still raises an issue if a user defines both file:///path # *and* /path. Should we raise an error in this case -- vila 20110505 if section.startswith('file://'): section_path = urlutils.local_path_from_url(section) else: section_path = section section_parts = section_path.rstrip('/').split('/') matched = True if len(section_parts) > len(location_parts): # More path components in the section, they can't match matched = False else: # Rely on zip truncating in length to the length of the shortest # argument sequence. names = zip(location_parts, section_parts) for name in names: if not fnmatch.fnmatch(name[0], name[1]): matched = False break if not matched: continue # build the path difference between the section and the location extra_path = '/'.join(location_parts[len(section_parts):]) yield section, extra_path, len(section_parts) class LocationConfig(LockableConfig): """A configuration object that gives the policy for a location.""" def __init__(self, location): super(LocationConfig, self).__init__( file_name=locations_config_filename()) # local file locations are looked up by local path, rather than # by file url. This is because the config file is a user # file, and we would rather not expose the user to file urls. if location.startswith('file://'): location = urlutils.local_path_from_url(location) self.location = location def config_id(self): return 'locations' @classmethod def from_string(cls, str_or_unicode, location, save=False): """Create a config object from a string. :param str_or_unicode: A string representing the file content. This will be utf-8 encoded. :param location: The location url to filter the configuration. :param save: Whether the file should be saved upon creation. """ conf = cls(location) conf._create_from_string(str_or_unicode, save) return conf def _get_matching_sections(self): """Return an ordered list of section names matching this location.""" matches = list(_iter_for_location_by_parts(self._get_parser(), self.location)) # put the longest (aka more specific) locations first matches.sort( key=lambda (section, extra_path, length): (length, section), reverse=True) for (section, extra_path, length) in matches: yield section, extra_path # should we stop looking for parent configs here? try: if self._get_parser()[section].as_bool('ignore_parents'): break except KeyError: pass def _get_sections(self, name=None): """See IniBasedConfig._get_sections().""" # We ignore the name here as the only sections handled are named with # the location path and we don't expose embedded sections either. parser = self._get_parser() for name, extra_path in self._get_matching_sections(): yield (name, parser[name], self.config_id()) def _get_option_policy(self, section, option_name): """Return the policy for the given (section, option_name) pair.""" # check for the old 'recurse=False' flag try: recurse = self._get_parser()[section].as_bool('recurse') except KeyError: recurse = True if not recurse: return POLICY_NORECURSE policy_key = option_name + ':policy' try: policy_name = self._get_parser()[section][policy_key] except KeyError: policy_name = None return _policy_value[policy_name] def _set_option_policy(self, section, option_name, option_policy): """Set the policy for the given option name in the given section.""" # The old recurse=False option affects all options in the # section. To handle multiple policies in the section, we # need to convert it to a policy_norecurse key. try: recurse = self._get_parser()[section].as_bool('recurse') except KeyError: pass else: symbol_versioning.warn( 'The recurse option is deprecated as of 0.14. ' 'The section "%s" has been converted to use policies.' % section, DeprecationWarning) del self._get_parser()[section]['recurse'] if not recurse: for key in self._get_parser()[section].keys(): if not key.endswith(':policy'): self._get_parser()[section][key + ':policy'] = 'norecurse' policy_key = option_name + ':policy' policy_name = _policy_name[option_policy] if policy_name is not None: self._get_parser()[section][policy_key] = policy_name else: if policy_key in self._get_parser()[section]: del self._get_parser()[section][policy_key] @needs_write_lock def set_user_option(self, option, value, store=STORE_LOCATION): """Save option and its value in the configuration.""" if store not in [STORE_LOCATION, STORE_LOCATION_NORECURSE, STORE_LOCATION_APPENDPATH]: raise ValueError('bad storage policy %r for %r' % (store, option)) self.reload() location = self.location if location.endswith('/'): location = location[:-1] parser = self._get_parser() if not location in parser and not location + '/' in parser: parser[location] = {} elif location + '/' in parser: location = location + '/' parser[location][option]=value # the allowed values of store match the config policies self._set_option_policy(location, option, store) self._write_config_file() for hook in OldConfigHooks['set']: hook(self, option, value) class BranchConfig(Config): """A configuration object giving the policy for a branch.""" def __init__(self, branch): super(BranchConfig, self).__init__() self._location_config = None self._branch_data_config = None self._global_config = None self.branch = branch self.option_sources = (self._get_location_config, self._get_branch_data_config, self._get_global_config) def config_id(self): return 'branch' def _get_branch_data_config(self): if self._branch_data_config is None: self._branch_data_config = TreeConfig(self.branch) self._branch_data_config.config_id = self.config_id return self._branch_data_config def _get_location_config(self): if self._location_config is None: self._location_config = LocationConfig(self.branch.base) return self._location_config def _get_global_config(self): if self._global_config is None: self._global_config = GlobalConfig() return self._global_config def _get_best_value(self, option_name): """This returns a user option from local, tree or global config. They are tried in that order. Use get_safe_value if trusted values are necessary. """ for source in self.option_sources: value = getattr(source(), option_name)() if value is not None: return value return None def _get_safe_value(self, option_name): """This variant of get_best_value never returns untrusted values. It does not return values from the branch data, because the branch may not be controlled by the user. We may wish to allow locations.conf to control whether branches are trusted in the future. """ for source in (self._get_location_config, self._get_global_config): value = getattr(source(), option_name)() if value is not None: return value return None def _get_user_id(self): """Return the full user id for the branch. e.g. "John Hacker " This is looked up in the email controlfile for the branch. """ return self._get_best_value('_get_user_id') def _get_change_editor(self): return self._get_best_value('_get_change_editor') def _get_signature_checking(self): """See Config._get_signature_checking.""" return self._get_best_value('_get_signature_checking') def _get_signing_policy(self): """See Config._get_signing_policy.""" return self._get_best_value('_get_signing_policy') def _get_user_option(self, option_name): """See Config._get_user_option.""" for source in self.option_sources: value = source()._get_user_option(option_name) if value is not None: return value return None def _get_sections(self, name=None): """See IniBasedConfig.get_sections().""" for source in self.option_sources: for section in source()._get_sections(name): yield section def _get_options(self, sections=None): opts = [] # First the locations options for option in self._get_location_config()._get_options(): yield option # Then the branch options branch_config = self._get_branch_data_config() if sections is None: sections = [('DEFAULT', branch_config._get_parser())] # FIXME: We shouldn't have to duplicate the code in IniBasedConfig but # Config itself has no notion of sections :( -- vila 20101001 config_id = self.config_id() for (section_name, section) in sections: for (name, value) in section.iteritems(): yield (name, value, section_name, config_id, branch_config._get_parser()) # Then the global options for option in self._get_global_config()._get_options(): yield option def set_user_option(self, name, value, store=STORE_BRANCH, warn_masked=False): if store == STORE_BRANCH: self._get_branch_data_config().set_option(value, name) elif store == STORE_GLOBAL: self._get_global_config().set_user_option(name, value) else: self._get_location_config().set_user_option(name, value, store) if not warn_masked: return if store in (STORE_GLOBAL, STORE_BRANCH): mask_value = self._get_location_config().get_user_option(name) if mask_value is not None: trace.warning('Value "%s" is masked by "%s" from' ' locations.conf', value, mask_value) else: if store == STORE_GLOBAL: branch_config = self._get_branch_data_config() mask_value = branch_config.get_user_option(name) if mask_value is not None: trace.warning('Value "%s" is masked by "%s" from' ' branch.conf', value, mask_value) def remove_user_option(self, option_name, section_name=None): self._get_branch_data_config().remove_option(option_name, section_name) def _gpg_signing_command(self): """See Config.gpg_signing_command.""" return self._get_safe_value('_gpg_signing_command') def _post_commit(self): """See Config.post_commit.""" return self._get_safe_value('_post_commit') def _get_nickname(self): value = self._get_explicit_nickname() if value is not None: return value if self.branch.name: return self.branch.name return urlutils.unescape(self.branch.base.split('/')[-2]) def has_explicit_nickname(self): """Return true if a nickname has been explicitly assigned.""" return self._get_explicit_nickname() is not None def _get_explicit_nickname(self): return self._get_best_value('_get_nickname') def _log_format(self): """See Config.log_format.""" return self._get_best_value('_log_format') def _validate_signatures_in_log(self): """See Config.validate_signatures_in_log.""" return self._get_best_value('_validate_signatures_in_log') def _acceptable_keys(self): """See Config.acceptable_keys.""" return self._get_best_value('_acceptable_keys') def ensure_config_dir_exists(path=None): """Make sure a configuration directory exists. This makes sure that the directory exists. On windows, since configuration directories are 2 levels deep, it makes sure both the directory and the parent directory exists. """ if path is None: path = config_dir() if not os.path.isdir(path): if sys.platform == 'win32': parent_dir = os.path.dirname(path) if not os.path.isdir(parent_dir): trace.mutter('creating config parent directory: %r', parent_dir) os.mkdir(parent_dir) trace.mutter('creating config directory: %r', path) os.mkdir(path) osutils.copy_ownership_from_path(path) def config_dir(): """Return per-user configuration directory as unicode string By default this is %APPDATA%/bazaar/2.0 on Windows, ~/.bazaar on Mac OS X and Linux. On Mac OS X and Linux, if there is a $XDG_CONFIG_HOME/bazaar directory, that will be used instead. TODO: Global option --config-dir to override this. """ base = osutils.path_from_environ('BZR_HOME') if sys.platform == 'win32': if base is None: base = win32utils.get_appdata_location() if base is None: base = win32utils.get_home_location() # GZ 2012-02-01: Really the two level subdirs only make sense inside # APPDATA, but hard to move. See bug 348640 for more. return osutils.pathjoin(base, 'bazaar', '2.0') if base is None: xdg_dir = osutils.path_from_environ('XDG_CONFIG_HOME') if xdg_dir is None: xdg_dir = osutils.pathjoin(osutils._get_home_dir(), ".config") xdg_dir = osutils.pathjoin(xdg_dir, 'bazaar') if osutils.isdir(xdg_dir): trace.mutter( "Using configuration in XDG directory %s." % xdg_dir) return xdg_dir base = osutils._get_home_dir() return osutils.pathjoin(base, ".bazaar") def config_filename(): """Return per-user configuration ini file filename.""" return osutils.pathjoin(config_dir(), 'bazaar.conf') def locations_config_filename(): """Return per-user configuration ini file filename.""" return osutils.pathjoin(config_dir(), 'locations.conf') def authentication_config_filename(): """Return per-user authentication ini file filename.""" return osutils.pathjoin(config_dir(), 'authentication.conf') def user_ignore_config_filename(): """Return the user default ignore filename""" return osutils.pathjoin(config_dir(), 'ignore') def crash_dir(): """Return the directory name to store crash files. This doesn't implicitly create it. On Windows it's in the config directory; elsewhere it's /var/crash which may be monitored by apport. It can be overridden by $APPORT_CRASH_DIR. """ if sys.platform == 'win32': return osutils.pathjoin(config_dir(), 'Crash') else: # XXX: hardcoded in apport_python_hook.py; therefore here too -- mbp # 2010-01-31 return os.environ.get('APPORT_CRASH_DIR', '/var/crash') def xdg_cache_dir(): # See http://standards.freedesktop.org/basedir-spec/latest/ar01s03.html # Possibly this should be different on Windows? e = os.environ.get('XDG_CACHE_HOME', None) if e: return e else: return os.path.expanduser('~/.cache') def _get_default_mail_domain(mailname_file='/etc/mailname'): """If possible, return the assumed default email domain. :returns: string mail domain, or None. """ if sys.platform == 'win32': # No implementation yet; patches welcome return None try: f = open(mailname_file) except (IOError, OSError), e: return None try: domain = f.readline().strip() return domain finally: f.close() def default_email(): v = os.environ.get('BZR_EMAIL') if v: return v.decode(osutils.get_user_encoding()) v = os.environ.get('EMAIL') if v: return v.decode(osutils.get_user_encoding()) name, email = _auto_user_id() if name and email: return u'%s <%s>' % (name, email) elif email: return email raise errors.NoWhoami() def _auto_user_id(): """Calculate automatic user identification. :returns: (realname, email), either of which may be None if they can't be determined. Only used when none is set in the environment or the id file. This only returns an email address if we can be fairly sure the address is reasonable, ie if /etc/mailname is set on unix. This doesn't use the FQDN as the default domain because that may be slow, and it doesn't use the hostname alone because that's not normally a reasonable address. """ if sys.platform == 'win32': # No implementation to reliably determine Windows default mail # address; please add one. return None, None default_mail_domain = _get_default_mail_domain() if not default_mail_domain: return None, None import pwd uid = os.getuid() try: w = pwd.getpwuid(uid) except KeyError: trace.mutter('no passwd entry for uid %d?' % uid) return None, None # we try utf-8 first, because on many variants (like Linux), # /etc/passwd "should" be in utf-8, and because it's unlikely to give # false positives. (many users will have their user encoding set to # latin-1, which cannot raise UnicodeError.) try: gecos = w.pw_gecos.decode('utf-8') encoding = 'utf-8' except UnicodeError: try: encoding = osutils.get_user_encoding() gecos = w.pw_gecos.decode(encoding) except UnicodeError, e: trace.mutter("cannot decode passwd entry %s" % w) return None, None try: username = w.pw_name.decode(encoding) except UnicodeError, e: trace.mutter("cannot decode passwd entry %s" % w) return None, None comma = gecos.find(',') if comma == -1: realname = gecos else: realname = gecos[:comma] return realname, (username + '@' + default_mail_domain) def parse_username(username): """Parse e-mail username and return a (name, address) tuple.""" match = re.match(r'(.*?)\s*?', username) if match is None: return (username, '') else: return (match.group(1), match.group(2)) def extract_email_address(e): """Return just the address part of an email string. That is just the user@domain part, nothing else. This part is required to contain only ascii characters. If it can't be extracted, raises an error. >>> extract_email_address('Jane Tester ') "jane@test.com" """ name, email = parse_username(e) if not email: raise errors.NoEmailInUsername(e) return email class TreeConfig(IniBasedConfig): """Branch configuration data associated with its contents, not location""" # XXX: Really needs a better name, as this is not part of the tree! -- mbp 20080507 def __init__(self, branch): self._config = branch._get_config() self.branch = branch def _get_parser(self, file=None): if file is not None: return IniBasedConfig._get_parser(file) return self._config._get_configobj() def get_option(self, name, section=None, default=None): self.branch.lock_read() try: return self._config.get_option(name, section, default) finally: self.branch.unlock() def set_option(self, value, name, section=None): """Set a per-branch configuration option""" # FIXME: We shouldn't need to lock explicitly here but rather rely on # higher levels providing the right lock -- vila 20101004 self.branch.lock_write() try: self._config.set_option(value, name, section) finally: self.branch.unlock() def remove_option(self, option_name, section_name=None): # FIXME: We shouldn't need to lock explicitly here but rather rely on # higher levels providing the right lock -- vila 20101004 self.branch.lock_write() try: self._config.remove_option(option_name, section_name) finally: self.branch.unlock() class AuthenticationConfig(object): """The authentication configuration file based on a ini file. Implements the authentication.conf file described in doc/developers/authentication-ring.txt. """ def __init__(self, _file=None): self._config = None # The ConfigObj if _file is None: self._filename = authentication_config_filename() self._input = self._filename = authentication_config_filename() else: # Tests can provide a string as _file self._filename = None self._input = _file def _get_config(self): if self._config is not None: return self._config try: # FIXME: Should we validate something here ? Includes: empty # sections are useless, at least one of # user/password/password_encoding should be defined, etc. # Note: the encoding below declares that the file itself is utf-8 # encoded, but the values in the ConfigObj are always Unicode. self._config = ConfigObj(self._input, encoding='utf-8') except configobj.ConfigObjError, e: raise errors.ParseConfigError(e.errors, e.config.filename) except UnicodeError: raise errors.ConfigContentError(self._filename) return self._config def _save(self): """Save the config file, only tests should use it for now.""" conf_dir = os.path.dirname(self._filename) ensure_config_dir_exists(conf_dir) f = file(self._filename, 'wb') try: self._get_config().write(f) finally: f.close() def _set_option(self, section_name, option_name, value): """Set an authentication configuration option""" conf = self._get_config() section = conf.get(section_name) if section is None: conf[section] = {} section = conf[section] section[option_name] = value self._save() def get_credentials(self, scheme, host, port=None, user=None, path=None, realm=None): """Returns the matching credentials from authentication.conf file. :param scheme: protocol :param host: the server address :param port: the associated port (optional) :param user: login (optional) :param path: the absolute path on the server (optional) :param realm: the http authentication realm (optional) :return: A dict containing the matching credentials or None. This includes: - name: the section name of the credentials in the authentication.conf file, - user: can't be different from the provided user if any, - scheme: the server protocol, - host: the server address, - port: the server port (can be None), - path: the absolute server path (can be None), - realm: the http specific authentication realm (can be None), - password: the decoded password, could be None if the credential defines only the user - verify_certificates: https specific, True if the server certificate should be verified, False otherwise. """ credentials = None for auth_def_name, auth_def in self._get_config().items(): if type(auth_def) is not configobj.Section: raise ValueError("%s defined outside a section" % auth_def_name) a_scheme, a_host, a_user, a_path = map( auth_def.get, ['scheme', 'host', 'user', 'path']) try: a_port = auth_def.as_int('port') except KeyError: a_port = None except ValueError: raise ValueError("'port' not numeric in %s" % auth_def_name) try: a_verify_certificates = auth_def.as_bool('verify_certificates') except KeyError: a_verify_certificates = True except ValueError: raise ValueError( "'verify_certificates' not boolean in %s" % auth_def_name) # Attempt matching if a_scheme is not None and scheme != a_scheme: continue if a_host is not None: if not (host == a_host or (a_host.startswith('.') and host.endswith(a_host))): continue if a_port is not None and port != a_port: continue if (a_path is not None and path is not None and not path.startswith(a_path)): continue if (a_user is not None and user is not None and a_user != user): # Never contradict the caller about the user to be used continue if a_user is None: # Can't find a user continue # Prepare a credentials dictionary with additional keys # for the credential providers credentials = dict(name=auth_def_name, user=a_user, scheme=a_scheme, host=host, port=port, path=path, realm=realm, password=auth_def.get('password', None), verify_certificates=a_verify_certificates) # Decode the password in the credentials (or get one) self.decode_password(credentials, auth_def.get('password_encoding', None)) if 'auth' in debug.debug_flags: trace.mutter("Using authentication section: %r", auth_def_name) break if credentials is None: # No credentials were found in authentication.conf, try the fallback # credentials stores. credentials = credential_store_registry.get_fallback_credentials( scheme, host, port, user, path, realm) return credentials def set_credentials(self, name, host, user, scheme=None, password=None, port=None, path=None, verify_certificates=None, realm=None): """Set authentication credentials for a host. Any existing credentials with matching scheme, host, port and path will be deleted, regardless of name. :param name: An arbitrary name to describe this set of credentials. :param host: Name of the host that accepts these credentials. :param user: The username portion of these credentials. :param scheme: The URL scheme (e.g. ssh, http) the credentials apply to. :param password: Password portion of these credentials. :param port: The IP port on the host that these credentials apply to. :param path: A filesystem path on the host that these credentials apply to. :param verify_certificates: On https, verify server certificates if True. :param realm: The http authentication realm (optional). """ values = {'host': host, 'user': user} if password is not None: values['password'] = password if scheme is not None: values['scheme'] = scheme if port is not None: values['port'] = '%d' % port if path is not None: values['path'] = path if verify_certificates is not None: values['verify_certificates'] = str(verify_certificates) if realm is not None: values['realm'] = realm config = self._get_config() for_deletion = [] for section, existing_values in config.items(): for key in ('scheme', 'host', 'port', 'path', 'realm'): if existing_values.get(key) != values.get(key): break else: del config[section] config.update({name: values}) self._save() def get_user(self, scheme, host, port=None, realm=None, path=None, prompt=None, ask=False, default=None): """Get a user from authentication file. :param scheme: protocol :param host: the server address :param port: the associated port (optional) :param realm: the realm sent by the server (optional) :param path: the absolute path on the server (optional) :param ask: Ask the user if there is no explicitly configured username (optional) :param default: The username returned if none is defined (optional). :return: The found user. """ credentials = self.get_credentials(scheme, host, port, user=None, path=path, realm=realm) if credentials is not None: user = credentials['user'] else: user = None if user is None: if ask: if prompt is None: # Create a default prompt suitable for most cases prompt = u'%s' % (scheme.upper(),) + u' %(host)s username' # Special handling for optional fields in the prompt if port is not None: prompt_host = '%s:%d' % (host, port) else: prompt_host = host user = ui.ui_factory.get_username(prompt, host=prompt_host) else: user = default return user def get_password(self, scheme, host, user, port=None, realm=None, path=None, prompt=None): """Get a password from authentication file or prompt the user for one. :param scheme: protocol :param host: the server address :param port: the associated port (optional) :param user: login :param realm: the realm sent by the server (optional) :param path: the absolute path on the server (optional) :return: The found password or the one entered by the user. """ credentials = self.get_credentials(scheme, host, port, user, path, realm) if credentials is not None: password = credentials['password'] if password is not None and scheme is 'ssh': trace.warning('password ignored in section [%s],' ' use an ssh agent instead' % credentials['name']) password = None else: password = None # Prompt user only if we could't find a password if password is None: if prompt is None: # Create a default prompt suitable for most cases prompt = u'%s' % scheme.upper() + u' %(user)s@%(host)s password' # Special handling for optional fields in the prompt if port is not None: prompt_host = '%s:%d' % (host, port) else: prompt_host = host password = ui.ui_factory.get_password(prompt, host=prompt_host, user=user) return password def decode_password(self, credentials, encoding): try: cs = credential_store_registry.get_credential_store(encoding) except KeyError: raise ValueError('%r is not a known password_encoding' % encoding) credentials['password'] = cs.decode_password(credentials) return credentials class CredentialStoreRegistry(registry.Registry): """A class that registers credential stores. A credential store provides access to credentials via the password_encoding field in authentication.conf sections. Except for stores provided by bzr itself, most stores are expected to be provided by plugins that will therefore use register_lazy(password_encoding, module_name, member_name, help=help, fallback=fallback) to install themselves. A fallback credential store is one that is queried if no credentials can be found via authentication.conf. """ def get_credential_store(self, encoding=None): cs = self.get(encoding) if callable(cs): cs = cs() return cs def is_fallback(self, name): """Check if the named credentials store should be used as fallback.""" return self.get_info(name) def get_fallback_credentials(self, scheme, host, port=None, user=None, path=None, realm=None): """Request credentials from all fallback credentials stores. The first credentials store that can provide credentials wins. """ credentials = None for name in self.keys(): if not self.is_fallback(name): continue cs = self.get_credential_store(name) credentials = cs.get_credentials(scheme, host, port, user, path, realm) if credentials is not None: # We found some credentials break return credentials def register(self, key, obj, help=None, override_existing=False, fallback=False): """Register a new object to a name. :param key: This is the key to use to request the object later. :param obj: The object to register. :param help: Help text for this entry. This may be a string or a callable. If it is a callable, it should take two parameters (registry, key): this registry and the key that the help was registered under. :param override_existing: Raise KeyErorr if False and something has already been registered for that key. If True, ignore if there is an existing key (always register the new value). :param fallback: Whether this credential store should be used as fallback. """ return super(CredentialStoreRegistry, self).register(key, obj, help, info=fallback, override_existing=override_existing) def register_lazy(self, key, module_name, member_name, help=None, override_existing=False, fallback=False): """Register a new credential store to be loaded on request. :param module_name: The python path to the module. Such as 'os.path'. :param member_name: The member of the module to return. If empty or None, get() will return the module itself. :param help: Help text for this entry. This may be a string or a callable. :param override_existing: If True, replace the existing object with the new one. If False, if there is already something registered with the same key, raise a KeyError :param fallback: Whether this credential store should be used as fallback. """ return super(CredentialStoreRegistry, self).register_lazy( key, module_name, member_name, help, info=fallback, override_existing=override_existing) credential_store_registry = CredentialStoreRegistry() class CredentialStore(object): """An abstract class to implement storage for credentials""" def decode_password(self, credentials): """Returns a clear text password for the provided credentials.""" raise NotImplementedError(self.decode_password) def get_credentials(self, scheme, host, port=None, user=None, path=None, realm=None): """Return the matching credentials from this credential store. This method is only called on fallback credential stores. """ raise NotImplementedError(self.get_credentials) class PlainTextCredentialStore(CredentialStore): __doc__ = """Plain text credential store for the authentication.conf file""" def decode_password(self, credentials): """See CredentialStore.decode_password.""" return credentials['password'] credential_store_registry.register('plain', PlainTextCredentialStore, help=PlainTextCredentialStore.__doc__) credential_store_registry.default_key = 'plain' class Base64CredentialStore(CredentialStore): __doc__ = """Base64 credential store for the authentication.conf file""" def decode_password(self, credentials): """See CredentialStore.decode_password.""" # GZ 2012-07-28: Will raise binascii.Error if password is not base64, # should probably propogate as something more useful. return base64.decodestring(credentials['password']) credential_store_registry.register('base64', Base64CredentialStore, help=Base64CredentialStore.__doc__) class BzrDirConfig(object): def __init__(self, bzrdir): self._bzrdir = bzrdir self._config = bzrdir._get_config() def set_default_stack_on(self, value): """Set the default stacking location. It may be set to a location, or None. This policy affects all branches contained by this control dir, except for those under repositories. """ if self._config is None: raise errors.BzrError("Cannot set configuration in %s" % self._bzrdir) if value is None: self._config.set_option('', 'default_stack_on') else: self._config.set_option(value, 'default_stack_on') def get_default_stack_on(self): """Return the default stacking location. This will either be a location, or None. This policy affects all branches contained by this control dir, except for those under repositories. """ if self._config is None: return None value = self._config.get_option('default_stack_on') if value == '': value = None return value class TransportConfig(object): """A Config that reads/writes a config file on a Transport. It is a low-level object that considers config data to be name/value pairs that may be associated with a section. Assigning meaning to these values is done at higher levels like TreeConfig. """ def __init__(self, transport, filename): self._transport = transport self._filename = filename def get_option(self, name, section=None, default=None): """Return the value associated with a named option. :param name: The name of the value :param section: The section the option is in (if any) :param default: The value to return if the value is not set :return: The value or default value """ configobj = self._get_configobj() if section is None: section_obj = configobj else: try: section_obj = configobj[section] except KeyError: return default value = section_obj.get(name, default) for hook in OldConfigHooks['get']: hook(self, name, value) return value def set_option(self, value, name, section=None): """Set the value associated with a named option. :param value: The value to set :param name: The name of the value to set :param section: The section the option is in (if any) """ configobj = self._get_configobj() if section is None: configobj[name] = value else: configobj.setdefault(section, {})[name] = value for hook in OldConfigHooks['set']: hook(self, name, value) self._set_configobj(configobj) def remove_option(self, option_name, section_name=None): configobj = self._get_configobj() if section_name is None: del configobj[option_name] else: del configobj[section_name][option_name] for hook in OldConfigHooks['remove']: hook(self, option_name) self._set_configobj(configobj) def _get_config_file(self): try: f = StringIO(self._transport.get_bytes(self._filename)) for hook in OldConfigHooks['load']: hook(self) return f except errors.NoSuchFile: return StringIO() except errors.PermissionDenied, e: trace.warning("Permission denied while trying to open " "configuration file %s.", urlutils.unescape_for_display( urlutils.join(self._transport.base, self._filename), "utf-8")) return StringIO() def _external_url(self): return urlutils.join(self._transport.external_url(), self._filename) def _get_configobj(self): f = self._get_config_file() try: try: conf = ConfigObj(f, encoding='utf-8') except configobj.ConfigObjError, e: raise errors.ParseConfigError(e.errors, self._external_url()) except UnicodeDecodeError: raise errors.ConfigContentError(self._external_url()) finally: f.close() return conf def _set_configobj(self, configobj): out_file = StringIO() configobj.write(out_file) out_file.seek(0) self._transport.put_file(self._filename, out_file) for hook in OldConfigHooks['save']: hook(self) class Option(object): """An option definition. The option *values* are stored in config files and found in sections. Here we define various properties about the option itself, its default value, how to convert it from stores, what to do when invalid values are encoutered, in which config files it can be stored. """ def __init__(self, name, override_from_env=None, default=None, default_from_env=None, help=None, from_unicode=None, invalid=None, unquote=True): """Build an option definition. :param name: the name used to refer to the option. :param override_from_env: A list of environment variables which can provide override any configuration setting. :param default: the default value to use when none exist in the config stores. This is either a string that ``from_unicode`` will convert into the proper type, a callable returning a unicode string so that ``from_unicode`` can be used on the return value, or a python object that can be stringified (so only the empty list is supported for example). :param default_from_env: A list of environment variables which can provide a default value. 'default' will be used only if none of the variables specified here are set in the environment. :param help: a doc string to explain the option to the user. :param from_unicode: a callable to convert the unicode string representing the option value in a store or its default value. :param invalid: the action to be taken when an invalid value is encountered in a store. This is called only when from_unicode is invoked to convert a string and returns None or raise ValueError or TypeError. Accepted values are: None (ignore invalid values), 'warning' (emit a warning), 'error' (emit an error message and terminates). :param unquote: should the unicode value be unquoted before conversion. This should be used only when the store providing the values cannot safely unquote them (see http://pad.lv/906897). It is provided so daughter classes can handle the quoting themselves. """ if override_from_env is None: override_from_env = [] if default_from_env is None: default_from_env = [] self.name = name self.override_from_env = override_from_env # Convert the default value to a unicode string so all values are # strings internally before conversion (via from_unicode) is attempted. if default is None: self.default = None elif isinstance(default, list): # Only the empty list is supported if default: raise AssertionError( 'Only empty lists are supported as default values') self.default = u',' elif isinstance(default, (str, unicode, bool, int, float)): # Rely on python to convert strings, booleans and integers self.default = u'%s' % (default,) elif callable(default): self.default = default else: # other python objects are not expected raise AssertionError('%r is not supported as a default value' % (default,)) self.default_from_env = default_from_env self._help = help self.from_unicode = from_unicode self.unquote = unquote if invalid and invalid not in ('warning', 'error'): raise AssertionError("%s not supported for 'invalid'" % (invalid,)) self.invalid = invalid @property def help(self): return self._help def convert_from_unicode(self, store, unicode_value): if self.unquote and store is not None and unicode_value is not None: unicode_value = store.unquote(unicode_value) if self.from_unicode is None or unicode_value is None: # Don't convert or nothing to convert return unicode_value try: converted = self.from_unicode(unicode_value) except (ValueError, TypeError): # Invalid values are ignored converted = None if converted is None and self.invalid is not None: # The conversion failed if self.invalid == 'warning': trace.warning('Value "%s" is not valid for "%s"', unicode_value, self.name) elif self.invalid == 'error': raise errors.ConfigOptionValueError(self.name, unicode_value) return converted def get_override(self): value = None for var in self.override_from_env: try: # If the env variable is defined, its value takes precedence value = os.environ[var].decode(osutils.get_user_encoding()) break except KeyError: continue return value def get_default(self): value = None for var in self.default_from_env: try: # If the env variable is defined, its value is the default one value = os.environ[var].decode(osutils.get_user_encoding()) break except KeyError: continue if value is None: # Otherwise, fallback to the value defined at registration if callable(self.default): value = self.default() if not isinstance(value, unicode): raise AssertionError( "Callable default value for '%s' should be unicode" % (self.name)) else: value = self.default return value def get_help_topic(self): return self.name def get_help_text(self, additional_see_also=None, plain=True): result = self.help from bzrlib import help_topics result += help_topics._format_see_also(additional_see_also) if plain: result = help_topics.help_as_plain_text(result) return result # Predefined converters to get proper values from store def bool_from_store(unicode_str): return ui.bool_from_string(unicode_str) def int_from_store(unicode_str): return int(unicode_str) _unit_suffixes = dict(K=10**3, M=10**6, G=10**9) def int_SI_from_store(unicode_str): """Convert a human readable size in SI units, e.g 10MB into an integer. Accepted suffixes are K,M,G. It is case-insensitive and may be followed by a trailing b (i.e. Kb, MB). This is intended to be practical and not pedantic. :return Integer, expanded to its base-10 value if a proper SI unit is found, None otherwise. """ regexp = "^(\d+)(([" + ''.join(_unit_suffixes) + "])b?)?$" p = re.compile(regexp, re.IGNORECASE) m = p.match(unicode_str) val = None if m is not None: val, _, unit = m.groups() val = int(val) if unit: try: coeff = _unit_suffixes[unit.upper()] except KeyError: raise ValueError(gettext('{0} is not an SI unit.').format(unit)) val *= coeff return val def float_from_store(unicode_str): return float(unicode_str) # Use an empty dict to initialize an empty configobj avoiding all parsing and # encoding checks _list_converter_config = configobj.ConfigObj( {}, encoding='utf-8', list_values=True, interpolation=False) class ListOption(Option): def __init__(self, name, default=None, default_from_env=None, help=None, invalid=None): """A list Option definition. This overrides the base class so the conversion from a unicode string can take quoting into account. """ super(ListOption, self).__init__( name, default=default, default_from_env=default_from_env, from_unicode=self.from_unicode, help=help, invalid=invalid, unquote=False) def from_unicode(self, unicode_str): if not isinstance(unicode_str, basestring): raise TypeError # Now inject our string directly as unicode. All callers got their # value from configobj, so values that need to be quoted are already # properly quoted. _list_converter_config.reset() _list_converter_config._parse([u"list=%s" % (unicode_str,)]) maybe_list = _list_converter_config['list'] if isinstance(maybe_list, basestring): if maybe_list: # A single value, most probably the user forgot (or didn't care # to add) the final ',' l = [maybe_list] else: # The empty string, convert to empty list l = [] else: # We rely on ConfigObj providing us with a list already l = maybe_list return l class RegistryOption(Option): """Option for a choice from a registry.""" def __init__(self, name, registry, default_from_env=None, help=None, invalid=None): """A registry based Option definition. This overrides the base class so the conversion from a unicode string can take quoting into account. """ super(RegistryOption, self).__init__( name, default=lambda: unicode(registry.default_key), default_from_env=default_from_env, from_unicode=self.from_unicode, help=help, invalid=invalid, unquote=False) self.registry = registry def from_unicode(self, unicode_str): if not isinstance(unicode_str, basestring): raise TypeError try: return self.registry.get(unicode_str) except KeyError: raise ValueError( "Invalid value %s for %s." "See help for a list of possible values." % (unicode_str, self.name)) @property def help(self): ret = [self._help, "\n\nThe following values are supported:\n"] for key in self.registry.keys(): ret.append(" %s - %s\n" % (key, self.registry.get_help(key))) return "".join(ret) _option_ref_re = lazy_regex.lazy_compile('({[^\d\W](?:\.\w|-\w|\w)*})') """Describes an expandable option reference. We want to match the most embedded reference first. I.e. for '{{foo}}' we will get '{foo}', for '{bar{baz}}' we will get '{baz}' """ def iter_option_refs(string): # Split isolate refs so every other chunk is a ref is_ref = False for chunk in _option_ref_re.split(string): yield is_ref, chunk is_ref = not is_ref class OptionRegistry(registry.Registry): """Register config options by their name. This overrides ``registry.Registry`` to simplify registration by acquiring some information from the option object itself. """ def _check_option_name(self, option_name): """Ensures an option name is valid. :param option_name: The name to validate. """ if _option_ref_re.match('{%s}' % option_name) is None: raise errors.IllegalOptionName(option_name) def register(self, option): """Register a new option to its name. :param option: The option to register. Its name is used as the key. """ self._check_option_name(option.name) super(OptionRegistry, self).register(option.name, option, help=option.help) def register_lazy(self, key, module_name, member_name): """Register a new option to be loaded on request. :param key: the key to request the option later. Since the registration is lazy, it should be provided and match the option name. :param module_name: the python path to the module. Such as 'os.path'. :param member_name: the member of the module to return. If empty or None, get() will return the module itself. """ self._check_option_name(key) super(OptionRegistry, self).register_lazy(key, module_name, member_name) def get_help(self, key=None): """Get the help text associated with the given key""" option = self.get(key) the_help = option.help if callable(the_help): return the_help(self, key) return the_help option_registry = OptionRegistry() # Registered options in lexicographical order option_registry.register( Option('append_revisions_only', default=None, from_unicode=bool_from_store, invalid='warning', help='''\ Whether to only append revisions to the mainline. If this is set to true, then it is not possible to change the existing mainline of the branch. ''')) option_registry.register( ListOption('acceptable_keys', default=None, help="""\ List of GPG key patterns which are acceptable for verification. """)) option_registry.register( Option('add.maximum_file_size', default=u'20MB', from_unicode=int_SI_from_store, help="""\ Size above which files should be added manually. Files below this size are added automatically when using ``bzr add`` without arguments. A negative value means disable the size check. """)) option_registry.register( Option('bound', default=None, from_unicode=bool_from_store, help="""\ Is the branch bound to ``bound_location``. If set to "True", the branch should act as a checkout, and push each commit to the bound_location. This option is normally set by ``bind``/``unbind``. See also: bound_location. """)) option_registry.register( Option('bound_location', default=None, help="""\ The location that commits should go to when acting as a checkout. This option is normally set by ``bind``. See also: bound. """)) option_registry.register( Option('branch.fetch_tags', default=False, from_unicode=bool_from_store, help="""\ Whether revisions associated with tags should be fetched. """)) option_registry.register_lazy( 'bzr.transform.orphan_policy', 'bzrlib.transform', 'opt_transform_orphan') option_registry.register( Option('bzr.workingtree.worth_saving_limit', default=10, from_unicode=int_from_store, invalid='warning', help='''\ How many changes before saving the dirstate. -1 means that we will never rewrite the dirstate file for only stat-cache changes. Regardless of this setting, we will always rewrite the dirstate file if a file is added/removed/renamed/etc. This flag only affects the behavior of updating the dirstate file after we notice that a file has been touched. ''')) option_registry.register( Option('bugtracker', default=None, help='''\ Default bug tracker to use. This bug tracker will be used for example when marking bugs as fixed using ``bzr commit --fixes``, if no explicit bug tracker was specified. ''')) option_registry.register( Option('check_signatures', default=CHECK_IF_POSSIBLE, from_unicode=signature_policy_from_unicode, help='''\ GPG checking policy. Possible values: require, ignore, check-available (default) this option will control whether bzr will require good gpg signatures, ignore them, or check them if they are present. ''')) option_registry.register( Option('child_submit_format', help='''The preferred format of submissions to this branch.''')) option_registry.register( Option('child_submit_to', help='''Where submissions to this branch are mailed to.''')) option_registry.register( Option('create_signatures', default=SIGN_WHEN_REQUIRED, from_unicode=signing_policy_from_unicode, help='''\ GPG Signing policy. Possible values: always, never, when-required (default) This option controls whether bzr will always create gpg signatures or not on commits. ''')) option_registry.register( Option('dirstate.fdatasync', default=True, from_unicode=bool_from_store, help='''\ Flush dirstate changes onto physical disk? If true (default), working tree metadata changes are flushed through the OS buffers to physical disk. This is somewhat slower, but means data should not be lost if the machine crashes. See also repository.fdatasync. ''')) option_registry.register( ListOption('debug_flags', default=[], help='Debug flags to activate.')) option_registry.register( Option('default_format', default='2a', help='Format used when creating branches.')) option_registry.register( Option('dpush_strict', default=None, from_unicode=bool_from_store, help='''\ The default value for ``dpush --strict``. If present, defines the ``--strict`` option default value for checking uncommitted changes before pushing into a different VCS without any custom bzr metadata. ''')) option_registry.register( Option('editor', help='The command called to launch an editor to enter a message.')) option_registry.register( Option('email', override_from_env=['BZR_EMAIL'], default=default_email, help='The users identity')) option_registry.register( Option('gpg_signing_command', default='gpg', help="""\ Program to use use for creating signatures. This should support at least the -u and --clearsign options. """)) option_registry.register( Option('gpg_signing_key', default=None, help="""\ GPG key to use for signing. This defaults to the first key associated with the users email. """)) option_registry.register( Option('ignore_missing_extensions', default=False, from_unicode=bool_from_store, help='''\ Control the missing extensions warning display. The warning will not be emitted if set to True. ''')) option_registry.register( Option('language', help='Language to translate messages into.')) option_registry.register( Option('locks.steal_dead', default=False, from_unicode=bool_from_store, help='''\ Steal locks that appears to be dead. If set to True, bzr will check if a lock is supposed to be held by an active process from the same user on the same machine. If the user and machine match, but no process with the given PID is active, then bzr will automatically break the stale lock, and create a new lock for this process. Otherwise, bzr will prompt as normal to break the lock. ''')) option_registry.register( Option('log_format', default='long', help= '''\ Log format to use when displaying revisions. Standard log formats are ``long``, ``short`` and ``line``. Additional formats may be provided by plugins. ''')) option_registry.register_lazy('mail_client', 'bzrlib.mail_client', 'opt_mail_client') option_registry.register( Option('output_encoding', help= 'Unicode encoding for output' ' (terminal encoding if not specified).')) option_registry.register( Option('parent_location', default=None, help="""\ The location of the default branch for pull or merge. This option is normally set when creating a branch, the first ``pull`` or by ``pull --remember``. """)) option_registry.register( Option('post_commit', default=None, help='''\ Post commit functions. An ordered list of python functions to call, separated by spaces. Each function takes branch, rev_id as parameters. ''')) option_registry.register_lazy('progress_bar', 'bzrlib.ui.text', 'opt_progress_bar') option_registry.register( Option('public_branch', default=None, help="""\ A publically-accessible version of this branch. This implies that the branch setting this option is not publically-accessible. Used and set by ``bzr send``. """)) option_registry.register( Option('push_location', default=None, help="""\ The location of the default branch for push. This option is normally set by the first ``push`` or ``push --remember``. """)) option_registry.register( Option('push_strict', default=None, from_unicode=bool_from_store, help='''\ The default value for ``push --strict``. If present, defines the ``--strict`` option default value for checking uncommitted changes before sending a merge directive. ''')) option_registry.register( Option('repository.fdatasync', default=True, from_unicode=bool_from_store, help='''\ Flush repository changes onto physical disk? If true (default), repository changes are flushed through the OS buffers to physical disk. This is somewhat slower, but means data should not be lost if the machine crashes. See also dirstate.fdatasync. ''')) option_registry.register_lazy('smtp_server', 'bzrlib.smtp_connection', 'smtp_server') option_registry.register_lazy('smtp_password', 'bzrlib.smtp_connection', 'smtp_password') option_registry.register_lazy('smtp_username', 'bzrlib.smtp_connection', 'smtp_username') option_registry.register( Option('selftest.timeout', default='600', from_unicode=int_from_store, help='Abort selftest if one test takes longer than this many seconds', )) option_registry.register( Option('send_strict', default=None, from_unicode=bool_from_store, help='''\ The default value for ``send --strict``. If present, defines the ``--strict`` option default value for checking uncommitted changes before sending a bundle. ''')) option_registry.register( Option('serve.client_timeout', default=300.0, from_unicode=float_from_store, help="If we wait for a new request from a client for more than" " X seconds, consider the client idle, and hangup.")) option_registry.register( Option('stacked_on_location', default=None, help="""The location where this branch is stacked on.""")) option_registry.register( Option('submit_branch', default=None, help="""\ The branch you intend to submit your current work to. This is automatically set by ``bzr send`` and ``bzr merge``, and is also used by the ``submit:`` revision spec. """)) option_registry.register( Option('submit_to', help='''Where submissions from this branch are mailed to.''')) option_registry.register( ListOption('suppress_warnings', default=[], help="List of warning classes to suppress.")) option_registry.register( Option('validate_signatures_in_log', default=False, from_unicode=bool_from_store, invalid='warning', help='''Whether to validate signatures in bzr log.''')) option_registry.register_lazy('ssl.ca_certs', 'bzrlib.transport.http._urllib2_wrappers', 'opt_ssl_ca_certs') option_registry.register_lazy('ssl.cert_reqs', 'bzrlib.transport.http._urllib2_wrappers', 'opt_ssl_cert_reqs') class Section(object): """A section defines a dict of option name => value. This is merely a read-only dict which can add some knowledge about the options. It is *not* a python dict object though and doesn't try to mimic its API. """ def __init__(self, section_id, options): self.id = section_id # We re-use the dict-like object received self.options = options def get(self, name, default=None, expand=True): return self.options.get(name, default) def iter_option_names(self): for k in self.options.iterkeys(): yield k def __repr__(self): # Mostly for debugging use return "" % (self.__class__.__name__, self.id) _NewlyCreatedOption = object() """Was the option created during the MutableSection lifetime""" _DeletedOption = object() """Was the option deleted during the MutableSection lifetime""" class MutableSection(Section): """A section allowing changes and keeping track of the original values.""" def __init__(self, section_id, options): super(MutableSection, self).__init__(section_id, options) self.reset_changes() def set(self, name, value): if name not in self.options: # This is a new option self.orig[name] = _NewlyCreatedOption elif name not in self.orig: self.orig[name] = self.get(name, None) self.options[name] = value def remove(self, name): if name not in self.orig and name in self.options: self.orig[name] = self.get(name, None) del self.options[name] def reset_changes(self): self.orig = {} def apply_changes(self, dirty, store): """Apply option value changes. ``self`` has been reloaded from the persistent storage. ``dirty`` contains the changes made since the previous loading. :param dirty: the mutable section containing the changes. :param store: the store containing the section """ for k, expected in dirty.orig.iteritems(): actual = dirty.get(k, _DeletedOption) reloaded = self.get(k, _NewlyCreatedOption) if actual is _DeletedOption: if k in self.options: self.remove(k) else: self.set(k, actual) # Report concurrent updates in an ad-hoc way. This should only # occurs when different processes try to update the same option # which is not supported (as in: the config framework is not meant # to be used as a sharing mechanism). if expected != reloaded: if actual is _DeletedOption: actual = '' if reloaded is _NewlyCreatedOption: reloaded = '' if expected is _NewlyCreatedOption: expected = '' # Someone changed the value since we get it from the persistent # storage. trace.warning(gettext( "Option {0} in section {1} of {2} was changed" " from {3} to {4}. The {5} value will be saved.".format( k, self.id, store.external_url(), expected, reloaded, actual))) # No need to keep track of these changes self.reset_changes() class Store(object): """Abstract interface to persistent storage for configuration options.""" readonly_section_class = Section mutable_section_class = MutableSection def __init__(self): # Which sections need to be saved (by section id). We use a dict here # so the dirty sections can be shared by multiple callers. self.dirty_sections = {} def is_loaded(self): """Returns True if the Store has been loaded. This is used to implement lazy loading and ensure the persistent storage is queried only when needed. """ raise NotImplementedError(self.is_loaded) def load(self): """Loads the Store from persistent storage.""" raise NotImplementedError(self.load) def _load_from_string(self, bytes): """Create a store from a string in configobj syntax. :param bytes: A string representing the file content. """ raise NotImplementedError(self._load_from_string) def unload(self): """Unloads the Store. This should make is_loaded() return False. This is used when the caller knows that the persistent storage has changed or may have change since the last load. """ raise NotImplementedError(self.unload) def quote(self, value): """Quote a configuration option value for storing purposes. This allows Stacks to present values as they will be stored. """ return value def unquote(self, value): """Unquote a configuration option value into unicode. The received value is quoted as stored. """ return value def save(self): """Saves the Store to persistent storage.""" raise NotImplementedError(self.save) def _need_saving(self): for s in self.dirty_sections.values(): if s.orig: # At least one dirty section contains a modification return True return False def apply_changes(self, dirty_sections): """Apply changes from dirty sections while checking for coherency. The Store content is discarded and reloaded from persistent storage to acquire up-to-date values. Dirty sections are MutableSection which kept track of the value they are expected to update. """ # We need an up-to-date version from the persistent storage, unload the # store. The reload will occur when needed (triggered by the first # get_mutable_section() call below. self.unload() # Apply the changes from the preserved dirty sections for section_id, dirty in dirty_sections.iteritems(): clean = self.get_mutable_section(section_id) clean.apply_changes(dirty, self) # Everything is clean now self.dirty_sections = {} def save_changes(self): """Saves the Store to persistent storage if changes occurred. Apply the changes recorded in the mutable sections to a store content refreshed from persistent storage. """ raise NotImplementedError(self.save_changes) def external_url(self): raise NotImplementedError(self.external_url) def get_sections(self): """Returns an ordered iterable of existing sections. :returns: An iterable of (store, section). """ raise NotImplementedError(self.get_sections) def get_mutable_section(self, section_id=None): """Returns the specified mutable section. :param section_id: The section identifier """ raise NotImplementedError(self.get_mutable_section) def __repr__(self): # Mostly for debugging use return "" % (self.__class__.__name__, self.external_url()) class CommandLineStore(Store): "A store to carry command line overrides for the config options.""" def __init__(self, opts=None): super(CommandLineStore, self).__init__() if opts is None: opts = {} self.options = {} self.id = 'cmdline' def _reset(self): # The dict should be cleared but not replaced so it can be shared. self.options.clear() def _from_cmdline(self, overrides): # Reset before accepting new definitions self._reset() for over in overrides: try: name, value = over.split('=', 1) except ValueError: raise errors.BzrCommandError( gettext("Invalid '%s', should be of the form 'name=value'") % (over,)) self.options[name] = value def external_url(self): # Not an url but it makes debugging easier and is never needed # otherwise return 'cmdline' def get_sections(self): yield self, self.readonly_section_class(None, self.options) class IniFileStore(Store): """A config Store using ConfigObj for storage. :ivar _config_obj: Private member to hold the ConfigObj instance used to serialize/deserialize the config file. """ def __init__(self): """A config Store using ConfigObj for storage. """ super(IniFileStore, self).__init__() self._config_obj = None def is_loaded(self): return self._config_obj != None def unload(self): self._config_obj = None self.dirty_sections = {} def _load_content(self): """Load the config file bytes. This should be provided by subclasses :return: Byte string """ raise NotImplementedError(self._load_content) def _save_content(self, content): """Save the config file bytes. This should be provided by subclasses :param content: Config file bytes to write """ raise NotImplementedError(self._save_content) def load(self): """Load the store from the associated file.""" if self.is_loaded(): return content = self._load_content() self._load_from_string(content) for hook in ConfigHooks['load']: hook(self) def _load_from_string(self, bytes): """Create a config store from a string. :param bytes: A string representing the file content. """ if self.is_loaded(): raise AssertionError('Already loaded: %r' % (self._config_obj,)) co_input = StringIO(bytes) try: # The config files are always stored utf8-encoded self._config_obj = ConfigObj(co_input, encoding='utf-8', list_values=False) except configobj.ConfigObjError, e: self._config_obj = None raise errors.ParseConfigError(e.errors, self.external_url()) except UnicodeDecodeError: raise errors.ConfigContentError(self.external_url()) def save_changes(self): if not self.is_loaded(): # Nothing to save return if not self._need_saving(): return # Preserve the current version dirty_sections = dict(self.dirty_sections.items()) self.apply_changes(dirty_sections) # Save to the persistent storage self.save() def save(self): if not self.is_loaded(): # Nothing to save return out = StringIO() self._config_obj.write(out) self._save_content(out.getvalue()) for hook in ConfigHooks['save']: hook(self) def get_sections(self): """Get the configobj section in the file order. :returns: An iterable of (store, section). """ # We need a loaded store try: self.load() except (errors.NoSuchFile, errors.PermissionDenied): # If the file can't be read, there is no sections return cobj = self._config_obj if cobj.scalars: yield self, self.readonly_section_class(None, cobj) for section_name in cobj.sections: yield (self, self.readonly_section_class(section_name, cobj[section_name])) def get_mutable_section(self, section_id=None): # We need a loaded store try: self.load() except errors.NoSuchFile: # The file doesn't exist, let's pretend it was empty self._load_from_string('') if section_id in self.dirty_sections: # We already created a mutable section for this id return self.dirty_sections[section_id] if section_id is None: section = self._config_obj else: section = self._config_obj.setdefault(section_id, {}) mutable_section = self.mutable_section_class(section_id, section) # All mutable sections can become dirty self.dirty_sections[section_id] = mutable_section return mutable_section def quote(self, value): try: # configobj conflates automagical list values and quoting self._config_obj.list_values = True return self._config_obj._quote(value) finally: self._config_obj.list_values = False def unquote(self, value): if value and isinstance(value, basestring): # _unquote doesn't handle None nor empty strings nor anything that # is not a string, really. value = self._config_obj._unquote(value) return value def external_url(self): # Since an IniFileStore can be used without a file (at least in tests), # it's better to provide something than raising a NotImplementedError. # All daughter classes are supposed to provide an implementation # anyway. return 'In-Process Store, no URL' class TransportIniFileStore(IniFileStore): """IniFileStore that loads files from a transport. :ivar transport: The transport object where the config file is located. :ivar file_name: The config file basename in the transport directory. """ def __init__(self, transport, file_name): """A Store using a ini file on a Transport :param transport: The transport object where the config file is located. :param file_name: The config file basename in the transport directory. """ super(TransportIniFileStore, self).__init__() self.transport = transport self.file_name = file_name def _load_content(self): try: return self.transport.get_bytes(self.file_name) except errors.PermissionDenied: trace.warning("Permission denied while trying to load " "configuration store %s.", self.external_url()) raise def _save_content(self, content): self.transport.put_bytes(self.file_name, content) def external_url(self): # FIXME: external_url should really accepts an optional relpath # parameter (bug #750169) :-/ -- vila 2011-04-04 # The following will do in the interim but maybe we don't want to # expose a path here but rather a config ID and its associated # object . return urlutils.join(self.transport.external_url(), self.file_name) # Note that LockableConfigObjStore inherits from ConfigObjStore because we need # unlockable stores for use with objects that can already ensure the locking # (think branches). If different stores (not based on ConfigObj) are created, # they may face the same issue. class LockableIniFileStore(TransportIniFileStore): """A ConfigObjStore using locks on save to ensure store integrity.""" def __init__(self, transport, file_name, lock_dir_name=None): """A config Store using ConfigObj for storage. :param transport: The transport object where the config file is located. :param file_name: The config file basename in the transport directory. """ if lock_dir_name is None: lock_dir_name = 'lock' self.lock_dir_name = lock_dir_name super(LockableIniFileStore, self).__init__(transport, file_name) self._lock = lockdir.LockDir(self.transport, self.lock_dir_name) def lock_write(self, token=None): """Takes a write lock in the directory containing the config file. If the directory doesn't exist it is created. """ # FIXME: This doesn't check the ownership of the created directories as # ensure_config_dir_exists does. It should if the transport is local # -- vila 2011-04-06 self.transport.create_prefix() return self._lock.lock_write(token) def unlock(self): self._lock.unlock() def break_lock(self): self._lock.break_lock() @needs_write_lock def save(self): # We need to be able to override the undecorated implementation self.save_without_locking() def save_without_locking(self): super(LockableIniFileStore, self).save() # FIXME: global, bazaar, shouldn't that be 'user' instead or even # 'user_defaults' as opposed to 'user_overrides', 'system_defaults' # (/etc/bzr/bazaar.conf) and 'system_overrides' ? -- vila 2011-04-05 # FIXME: Moreover, we shouldn't need classes for these stores either, factory # functions or a registry will make it easier and clearer for tests, focusing # on the relevant parts of the API that needs testing -- vila 20110503 (based # on a poolie's remark) class GlobalStore(LockableIniFileStore): """A config store for global options. There is a single GlobalStore for a given process. """ def __init__(self, possible_transports=None): t = transport.get_transport_from_path( config_dir(), possible_transports=possible_transports) super(GlobalStore, self).__init__(t, 'bazaar.conf') self.id = 'bazaar' class LocationStore(LockableIniFileStore): """A config store for options specific to a location. There is a single LocationStore for a given process. """ def __init__(self, possible_transports=None): t = transport.get_transport_from_path( config_dir(), possible_transports=possible_transports) super(LocationStore, self).__init__(t, 'locations.conf') self.id = 'locations' class BranchStore(TransportIniFileStore): """A config store for branch options. There is a single BranchStore for a given branch. """ def __init__(self, branch): super(BranchStore, self).__init__(branch.control_transport, 'branch.conf') self.branch = branch self.id = 'branch' class ControlStore(LockableIniFileStore): def __init__(self, bzrdir): super(ControlStore, self).__init__(bzrdir.transport, 'control.conf', lock_dir_name='branch_lock') self.id = 'control' class SectionMatcher(object): """Select sections into a given Store. This is intended to be used to postpone getting an iterable of sections from a store. """ def __init__(self, store): self.store = store def get_sections(self): # This is where we require loading the store so we can see all defined # sections. sections = self.store.get_sections() # Walk the revisions in the order provided for store, s in sections: if self.match(s): yield store, s def match(self, section): """Does the proposed section match. :param section: A Section object. :returns: True if the section matches, False otherwise. """ raise NotImplementedError(self.match) class NameMatcher(SectionMatcher): def __init__(self, store, section_id): super(NameMatcher, self).__init__(store) self.section_id = section_id def match(self, section): return section.id == self.section_id class LocationSection(Section): def __init__(self, section, extra_path, branch_name=None): super(LocationSection, self).__init__(section.id, section.options) self.extra_path = extra_path if branch_name is None: branch_name = '' self.locals = {'relpath': extra_path, 'basename': urlutils.basename(extra_path), 'branchname': branch_name} def get(self, name, default=None, expand=True): value = super(LocationSection, self).get(name, default) if value is not None and expand: policy_name = self.get(name + ':policy', None) policy = _policy_value.get(policy_name, POLICY_NONE) if policy == POLICY_APPENDPATH: value = urlutils.join(value, self.extra_path) # expand section local options right now (since POLICY_APPENDPATH # will never add options references, it's ok to expand after it). chunks = [] for is_ref, chunk in iter_option_refs(value): if not is_ref: chunks.append(chunk) else: ref = chunk[1:-1] if ref in self.locals: chunks.append(self.locals[ref]) else: chunks.append(chunk) value = ''.join(chunks) return value class StartingPathMatcher(SectionMatcher): """Select sections for a given location respecting the Store order.""" # FIXME: Both local paths and urls can be used for section names as well as # ``location`` to stay consistent with ``LocationMatcher`` which itself # inherited the fuzziness from the previous ``LocationConfig`` # implementation. We probably need to revisit which encoding is allowed for # both ``location`` and section names and how we normalize # them. http://pad.lv/85479, http://pad.lv/437009 and http://359320 are # related too. -- vila 2012-01-04 def __init__(self, store, location): super(StartingPathMatcher, self).__init__(store) if location.startswith('file://'): location = urlutils.local_path_from_url(location) self.location = location def get_sections(self): """Get all sections matching ``location`` in the store. The most generic sections are described first in the store, then more specific ones can be provided for reduced scopes. The returned section are therefore returned in the reversed order so the most specific ones can be found first. """ location_parts = self.location.rstrip('/').split('/') store = self.store # Later sections are more specific, they should be returned first for _, section in reversed(list(store.get_sections())): if section.id is None: # The no-name section is always included if present yield store, LocationSection(section, self.location) continue section_path = section.id if section_path.startswith('file://'): # the location is already a local path or URL, convert the # section id to the same format section_path = urlutils.local_path_from_url(section_path) if (self.location.startswith(section_path) or fnmatch.fnmatch(self.location, section_path)): section_parts = section_path.rstrip('/').split('/') extra_path = '/'.join(location_parts[len(section_parts):]) yield store, LocationSection(section, extra_path) class LocationMatcher(SectionMatcher): def __init__(self, store, location): super(LocationMatcher, self).__init__(store) url, params = urlutils.split_segment_parameters(location) if location.startswith('file://'): location = urlutils.local_path_from_url(location) self.location = location branch_name = params.get('branch') if branch_name is None: self.branch_name = urlutils.basename(self.location) else: self.branch_name = urlutils.unescape(branch_name) def _get_matching_sections(self): """Get all sections matching ``location``.""" # We slightly diverge from LocalConfig here by allowing the no-name # section as the most generic one and the lower priority. no_name_section = None all_sections = [] # Filter out the no_name_section so _iter_for_location_by_parts can be # used (it assumes all sections have a name). for _, section in self.store.get_sections(): if section.id is None: no_name_section = section else: all_sections.append(section) # Unfortunately _iter_for_location_by_parts deals with section names so # we have to resync. filtered_sections = _iter_for_location_by_parts( [s.id for s in all_sections], self.location) iter_all_sections = iter(all_sections) matching_sections = [] if no_name_section is not None: matching_sections.append( (0, LocationSection(no_name_section, self.location))) for section_id, extra_path, length in filtered_sections: # a section id is unique for a given store so it's safe to take the # first matching section while iterating. Also, all filtered # sections are part of 'all_sections' and will always be found # there. while True: section = iter_all_sections.next() if section_id == section.id: section = LocationSection(section, extra_path, self.branch_name) matching_sections.append((length, section)) break return matching_sections def get_sections(self): # Override the default implementation as we want to change the order matching_sections = self._get_matching_sections() # We want the longest (aka more specific) locations first sections = sorted(matching_sections, key=lambda (length, section): (length, section.id), reverse=True) # Sections mentioning 'ignore_parents' restrict the selection for _, section in sections: # FIXME: We really want to use as_bool below -- vila 2011-04-07 ignore = section.get('ignore_parents', None) if ignore is not None: ignore = ui.bool_from_string(ignore) if ignore: break # Finally, we have a valid section yield self.store, section # FIXME: _shared_stores should be an attribute of a library state once a # library_state object is always available. _shared_stores = {} _shared_stores_at_exit_installed = False class Stack(object): """A stack of configurations where an option can be defined""" def __init__(self, sections_def, store=None, mutable_section_id=None): """Creates a stack of sections with an optional store for changes. :param sections_def: A list of Section or callables that returns an iterable of Section. This defines the Sections for the Stack and can be called repeatedly if needed. :param store: The optional Store where modifications will be recorded. If none is specified, no modifications can be done. :param mutable_section_id: The id of the MutableSection where changes are recorded. This requires the ``store`` parameter to be specified. """ self.sections_def = sections_def self.store = store self.mutable_section_id = mutable_section_id def iter_sections(self): """Iterate all the defined sections.""" # Ensuring lazy loading is achieved by delaying section matching (which # implies querying the persistent storage) until it can't be avoided # anymore by using callables to describe (possibly empty) section # lists. for sections in self.sections_def: for store, section in sections(): yield store, section def get(self, name, expand=True, convert=True): """Return the *first* option value found in the sections. This is where we guarantee that sections coming from Store are loaded lazily: the loading is delayed until we need to either check that an option exists or get its value, which in turn may require to discover in which sections it can be defined. Both of these (section and option existence) require loading the store (even partially). :param name: The queried option. :param expand: Whether options references should be expanded. :param convert: Whether the option value should be converted from unicode (do nothing for non-registered options). :returns: The value of the option. """ # FIXME: No caching of options nor sections yet -- vila 20110503 value = None found_store = None # Where the option value has been found # If the option is registered, it may provide additional info about # value handling try: opt = option_registry.get(name) except KeyError: # Not registered opt = None def expand_and_convert(val): # This may need to be called in different contexts if the value is # None or ends up being None during expansion or conversion. if val is not None: if expand: if isinstance(val, basestring): val = self._expand_options_in_string(val) else: trace.warning('Cannot expand "%s":' ' %s does not support option expansion' % (name, type(val))) if opt is None: val = found_store.unquote(val) elif convert: val = opt.convert_from_unicode(found_store, val) return val # First of all, check if the environment can override the configuration # value if opt is not None and opt.override_from_env: value = opt.get_override() value = expand_and_convert(value) if value is None: for store, section in self.iter_sections(): value = section.get(name) if value is not None: found_store = store break value = expand_and_convert(value) if opt is not None and value is None: # If the option is registered, it may provide a default value value = opt.get_default() value = expand_and_convert(value) for hook in ConfigHooks['get']: hook(self, name, value) return value def expand_options(self, string, env=None): """Expand option references in the string in the configuration context. :param string: The string containing option(s) to expand. :param env: An option dict defining additional configuration options or overriding existing ones. :returns: The expanded string. """ return self._expand_options_in_string(string, env) def _expand_options_in_string(self, string, env=None, _refs=None): """Expand options in the string in the configuration context. :param string: The string to be expanded. :param env: An option dict defining additional configuration options or overriding existing ones. :param _refs: Private list (FIFO) containing the options being expanded to detect loops. :returns: The expanded string. """ if string is None: # Not much to expand there return None if _refs is None: # What references are currently resolved (to detect loops) _refs = [] result = string # We need to iterate until no more refs appear ({{foo}} will need two # iterations for example). expanded = True while expanded: expanded = False chunks = [] for is_ref, chunk in iter_option_refs(result): if not is_ref: chunks.append(chunk) else: expanded = True name = chunk[1:-1] if name in _refs: raise errors.OptionExpansionLoop(string, _refs) _refs.append(name) value = self._expand_option(name, env, _refs) if value is None: raise errors.ExpandingUnknownOption(name, string) chunks.append(value) _refs.pop() result = ''.join(chunks) return result def _expand_option(self, name, env, _refs): if env is not None and name in env: # Special case, values provided in env takes precedence over # anything else value = env[name] else: value = self.get(name, expand=False, convert=False) value = self._expand_options_in_string(value, env, _refs) return value def _get_mutable_section(self): """Get the MutableSection for the Stack. This is where we guarantee that the mutable section is lazily loaded: this means we won't load the corresponding store before setting a value or deleting an option. In practice the store will often be loaded but this helps catching some programming errors. """ store = self.store section = store.get_mutable_section(self.mutable_section_id) return store, section def set(self, name, value): """Set a new value for the option.""" store, section = self._get_mutable_section() section.set(name, store.quote(value)) for hook in ConfigHooks['set']: hook(self, name, value) def remove(self, name): """Remove an existing option.""" _, section = self._get_mutable_section() section.remove(name) for hook in ConfigHooks['remove']: hook(self, name) def __repr__(self): # Mostly for debugging use return "" % (self.__class__.__name__, id(self)) def _get_overrides(self): # FIXME: Hack around library_state.initialize never called if bzrlib.global_state is not None: return bzrlib.global_state.cmdline_overrides.get_sections() return [] def get_shared_store(self, store, state=None): """Get a known shared store. Store urls uniquely identify them and are used to ensure a single copy is shared across all users. :param store: The store known to the caller. :param state: The library state where the known stores are kept. :returns: The store received if it's not a known one, an already known otherwise. """ if state is None: state = bzrlib.global_state if state is None: global _shared_stores_at_exit_installed stores = _shared_stores def save_config_changes(): for k, store in stores.iteritems(): store.save_changes() if not _shared_stores_at_exit_installed: # FIXME: Ugly hack waiting for library_state to always be # available. -- vila 20120731 import atexit atexit.register(save_config_changes) _shared_stores_at_exit_installed = True else: stores = state.config_stores url = store.external_url() try: return stores[url] except KeyError: stores[url] = store return store class MemoryStack(Stack): """A configuration stack defined from a string. This is mainly intended for tests and requires no disk resources. """ def __init__(self, content=None): """Create an in-memory stack from a given content. It uses a single store based on configobj and support reading and writing options. :param content: The initial content of the store. If None, the store is not loaded and ``_load_from_string`` can and should be used if needed. """ store = IniFileStore() if content is not None: store._load_from_string(content) super(MemoryStack, self).__init__( [store.get_sections], store) class _CompatibleStack(Stack): """Place holder for compatibility with previous design. This is intended to ease the transition from the Config-based design to the Stack-based design and should not be used nor relied upon by plugins. One assumption made here is that the daughter classes will all use Stores derived from LockableIniFileStore). It implements set() and remove () by re-loading the store before applying the modification and saving it. The long term plan being to implement a single write by store to save all modifications, this class should not be used in the interim. """ def set(self, name, value): # Force a reload self.store.unload() super(_CompatibleStack, self).set(name, value) # Force a write to persistent storage self.store.save() def remove(self, name): # Force a reload self.store.unload() super(_CompatibleStack, self).remove(name) # Force a write to persistent storage self.store.save() class GlobalStack(Stack): """Global options only stack. The following sections are queried: * command-line overrides, * the 'DEFAULT' section in bazaar.conf This stack will use the ``DEFAULT`` section in bazaar.conf as its MutableSection. """ def __init__(self): gstore = self.get_shared_store(GlobalStore()) super(GlobalStack, self).__init__( [self._get_overrides, NameMatcher(gstore, 'DEFAULT').get_sections], gstore, mutable_section_id='DEFAULT') class LocationStack(Stack): """Per-location options falling back to global options stack. The following sections are queried: * command-line overrides, * the sections matching ``location`` in ``locations.conf``, the order being defined by the number of path components in the section glob, higher numbers first (from most specific section to most generic). * the 'DEFAULT' section in bazaar.conf This stack will use the ``location`` section in locations.conf as its MutableSection. """ def __init__(self, location): """Make a new stack for a location and global configuration. :param location: A URL prefix to """ lstore = self.get_shared_store(LocationStore()) if location.startswith('file://'): location = urlutils.local_path_from_url(location) gstore = self.get_shared_store(GlobalStore()) super(LocationStack, self).__init__( [self._get_overrides, LocationMatcher(lstore, location).get_sections, NameMatcher(gstore, 'DEFAULT').get_sections], lstore, mutable_section_id=location) class BranchStack(Stack): """Per-location options falling back to branch then global options stack. The following sections are queried: * command-line overrides, * the sections matching ``location`` in ``locations.conf``, the order being defined by the number of path components in the section glob, higher numbers first (from most specific section to most generic), * the no-name section in branch.conf, * the ``DEFAULT`` section in ``bazaar.conf``. This stack will use the no-name section in ``branch.conf`` as its MutableSection. """ def __init__(self, branch): lstore = self.get_shared_store(LocationStore()) bstore = branch._get_config_store() gstore = self.get_shared_store(GlobalStore()) super(BranchStack, self).__init__( [self._get_overrides, LocationMatcher(lstore, branch.base).get_sections, NameMatcher(bstore, None).get_sections, NameMatcher(gstore, 'DEFAULT').get_sections], bstore) self.branch = branch def lock_write(self, token=None): return self.branch.lock_write(token) def unlock(self): return self.branch.unlock() @needs_write_lock def set(self, name, value): super(BranchStack, self).set(name, value) # Unlocking the branch will trigger a store.save_changes() so the last # unlock saves all the changes. @needs_write_lock def remove(self, name): super(BranchStack, self).remove(name) # Unlocking the branch will trigger a store.save_changes() so the last # unlock saves all the changes. class RemoteControlStack(Stack): """Remote control-only options stack.""" # FIXME 2011-11-22 JRV This should probably be renamed to avoid confusion # with the stack used for remote bzr dirs. RemoteControlStack only uses # control.conf and is used only for stack options. def __init__(self, bzrdir): cstore = bzrdir._get_config_store() super(RemoteControlStack, self).__init__( [NameMatcher(cstore, None).get_sections], cstore) self.bzrdir = bzrdir class BranchOnlyStack(Stack): """Branch-only options stack.""" # FIXME: _BranchOnlyStack only uses branch.conf and is used only for the # stacked_on_location options waiting for http://pad.lv/832042 to be fixed. # -- vila 2011-12-16 def __init__(self, branch): bstore = branch._get_config_store() super(BranchOnlyStack, self).__init__( [NameMatcher(bstore, None).get_sections], bstore) self.branch = branch def lock_write(self, token=None): return self.branch.lock_write(token) def unlock(self): return self.branch.unlock() @needs_write_lock def set(self, name, value): super(BranchOnlyStack, self).set(name, value) # Force a write to persistent storage self.store.save_changes() @needs_write_lock def remove(self, name): super(BranchOnlyStack, self).remove(name) # Force a write to persistent storage self.store.save_changes() class cmd_config(commands.Command): __doc__ = """Display, set or remove a configuration option. Display the active value for option NAME. If --all is specified, NAME is interpreted as a regular expression and all matching options are displayed mentioning their scope and without resolving option references in the value). The active value that bzr will take into account is the first one displayed for each option. If NAME is not given, --all .* is implied (all options are displayed for the current scope). Setting a value is achieved by using NAME=value without spaces. The value is set in the most relevant scope and can be checked by displaying the option again. Removing a value is achieved by using --remove NAME. """ takes_args = ['name?'] takes_options = [ 'directory', # FIXME: This should be a registry option so that plugins can register # their own config files (or not) and will also address # http://pad.lv/788991 -- vila 20101115 commands.Option('scope', help='Reduce the scope to the specified' ' configuration file.', type=unicode), commands.Option('all', help='Display all the defined values for the matching options.', ), commands.Option('remove', help='Remove the option from' ' the configuration file.'), ] _see_also = ['configuration'] @commands.display_command def run(self, name=None, all=False, directory=None, scope=None, remove=False): if directory is None: directory = '.' directory = directory_service.directories.dereference(directory) directory = urlutils.normalize_url(directory) if remove and all: raise errors.BzrError( '--all and --remove are mutually exclusive.') elif remove: # Delete the option in the given scope self._remove_config_option(name, directory, scope) elif name is None: # Defaults to all options self._show_matching_options('.*', directory, scope) else: try: name, value = name.split('=', 1) except ValueError: # Display the option(s) value(s) if all: self._show_matching_options(name, directory, scope) else: self._show_value(name, directory, scope) else: if all: raise errors.BzrError( 'Only one option can be set.') # Set the option value self._set_config_option(name, value, directory, scope) def _get_stack(self, directory, scope=None, write_access=False): """Get the configuration stack specified by ``directory`` and ``scope``. :param directory: Where the configurations are derived from. :param scope: A specific config to start from. :param write_access: Whether a write access to the stack will be attempted. """ # FIXME: scope should allow access to plugin-specific stacks (even # reduced to the plugin-specific store), related to # http://pad.lv/788991 -- vila 2011-11-15 if scope is not None: if scope == 'bazaar': return GlobalStack() elif scope == 'locations': return LocationStack(directory) elif scope == 'branch': (_, br, _) = ( controldir.ControlDir.open_containing_tree_or_branch( directory)) if write_access: self.add_cleanup(br.lock_write().unlock) return br.get_config_stack() raise errors.NoSuchConfig(scope) else: try: (_, br, _) = ( controldir.ControlDir.open_containing_tree_or_branch( directory)) if write_access: self.add_cleanup(br.lock_write().unlock) return br.get_config_stack() except errors.NotBranchError: return LocationStack(directory) def _quote_multiline(self, value): if '\n' in value: value = '"""' + value + '"""' return value def _show_value(self, name, directory, scope): conf = self._get_stack(directory, scope) value = conf.get(name, expand=True, convert=False) if value is not None: # Quote the value appropriately value = self._quote_multiline(value) self.outf.write('%s\n' % (value,)) else: raise errors.NoSuchConfigOption(name) def _show_matching_options(self, name, directory, scope): name = lazy_regex.lazy_compile(name) # We want any error in the regexp to be raised *now* so we need to # avoid the delay introduced by the lazy regexp. But, we still do # want the nicer errors raised by lazy_regex. name._compile_and_collapse() cur_store_id = None cur_section = None conf = self._get_stack(directory, scope) for store, section in conf.iter_sections(): for oname in section.iter_option_names(): if name.search(oname): if cur_store_id != store.id: # Explain where the options are defined self.outf.write('%s:\n' % (store.id,)) cur_store_id = store.id cur_section = None if (section.id is not None and cur_section != section.id): # Display the section id as it appears in the store # (None doesn't appear by definition) self.outf.write(' [%s]\n' % (section.id,)) cur_section = section.id value = section.get(oname, expand=False) # Quote the value appropriately value = self._quote_multiline(value) self.outf.write(' %s = %s\n' % (oname, value)) def _set_config_option(self, name, value, directory, scope): conf = self._get_stack(directory, scope, write_access=True) conf.set(name, value) # Explicitly save the changes conf.store.save_changes() def _remove_config_option(self, name, directory, scope): if name is None: raise errors.BzrCommandError( '--remove expects an option to remove.') conf = self._get_stack(directory, scope, write_access=True) try: conf.remove(name) # Explicitly save the changes conf.store.save_changes() except KeyError: raise errors.NoSuchConfigOption(name) # Test registries # # We need adapters that can build a Store or a Stack in a test context. Test # classes, based on TestCaseWithTransport, can use the registry to parametrize # themselves. The builder will receive a test instance and should return a # ready-to-use store or stack. Plugins that define new store/stacks can also # register themselves here to be tested against the tests defined in # bzrlib.tests.test_config. Note that the builder can be called multiple times # for the same test. # The registered object should be a callable receiving a test instance # parameter (inheriting from tests.TestCaseWithTransport) and returning a Store # object. test_store_builder_registry = registry.Registry() # The registered object should be a callable receiving a test instance # parameter (inheriting from tests.TestCaseWithTransport) and returning a Stack # object. test_stack_builder_registry = registry.Registry() bzr-2.7.0/bzrlib/conflicts.py0000644000000000000000000007546411705034034014302 0ustar 00000000000000# Copyright (C) 2005, 2006, 2007, 2009, 2010, 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # TODO: 'bzr resolve' should accept a directory name and work from that # point down from __future__ import absolute_import import os from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import errno from bzrlib import ( cleanup, errors, osutils, rio, trace, transform, workingtree, ) from bzrlib.i18n import gettext, ngettext """) from bzrlib import ( commands, option, registry, ) CONFLICT_SUFFIXES = ('.THIS', '.BASE', '.OTHER') class cmd_conflicts(commands.Command): __doc__ = """List files with conflicts. Merge will do its best to combine the changes in two branches, but there are some kinds of problems only a human can fix. When it encounters those, it will mark a conflict. A conflict means that you need to fix something, before you can commit. Conflicts normally are listed as short, human-readable messages. If --text is supplied, the pathnames of files with text conflicts are listed, instead. (This is useful for editing all files with text conflicts.) Use bzr resolve when you have fixed a problem. """ takes_options = [ 'directory', option.Option('text', help='List paths of files with text conflicts.'), ] _see_also = ['resolve', 'conflict-types'] def run(self, text=False, directory=u'.'): wt = workingtree.WorkingTree.open_containing(directory)[0] for conflict in wt.conflicts(): if text: if conflict.typestring != 'text conflict': continue self.outf.write(conflict.path + '\n') else: self.outf.write(unicode(conflict) + '\n') resolve_action_registry = registry.Registry() resolve_action_registry.register( 'done', 'done', 'Marks the conflict as resolved.') resolve_action_registry.register( 'take-this', 'take_this', 'Resolve the conflict preserving the version in the working tree.') resolve_action_registry.register( 'take-other', 'take_other', 'Resolve the conflict taking the merged version into account.') resolve_action_registry.default_key = 'done' class ResolveActionOption(option.RegistryOption): def __init__(self): super(ResolveActionOption, self).__init__( 'action', 'How to resolve the conflict.', value_switches=True, registry=resolve_action_registry) class cmd_resolve(commands.Command): __doc__ = """Mark a conflict as resolved. Merge will do its best to combine the changes in two branches, but there are some kinds of problems only a human can fix. When it encounters those, it will mark a conflict. A conflict means that you need to fix something, before you can commit. Once you have fixed a problem, use "bzr resolve" to automatically mark text conflicts as fixed, "bzr resolve FILE" to mark a specific conflict as resolved, or "bzr resolve --all" to mark all conflicts as resolved. """ aliases = ['resolved'] takes_args = ['file*'] takes_options = [ 'directory', option.Option('all', help='Resolve all conflicts in this tree.'), ResolveActionOption(), ] _see_also = ['conflicts'] def run(self, file_list=None, all=False, action=None, directory=None): if all: if file_list: raise errors.BzrCommandError(gettext("If --all is specified," " no FILE may be provided")) if directory is None: directory = u'.' tree = workingtree.WorkingTree.open_containing(directory)[0] if action is None: action = 'done' else: tree, file_list = workingtree.WorkingTree.open_containing_paths( file_list, directory) if file_list is None: if action is None: # FIXME: There is a special case here related to the option # handling that could be clearer and easier to discover by # providing an --auto action (bug #344013 and #383396) and # make it mandatory instead of implicit and active only # when no file_list is provided -- vila 091229 action = 'auto' else: if action is None: action = 'done' if action == 'auto': if file_list is None: un_resolved, resolved = tree.auto_resolve() if len(un_resolved) > 0: trace.note(ngettext('%d conflict auto-resolved.', '%d conflicts auto-resolved.', len(resolved)), len(resolved)) trace.note(gettext('Remaining conflicts:')) for conflict in un_resolved: trace.note(unicode(conflict)) return 1 else: trace.note(gettext('All conflicts resolved.')) return 0 else: # FIXME: This can never occur but the block above needs some # refactoring to transfer tree.auto_resolve() to # conflict.auto(tree) --vila 091242 pass else: before, after = resolve(tree, file_list, action=action) trace.note(ngettext('{0} conflict resolved, {1} remaining', '{0} conflicts resolved, {1} remaining', before-after).format(before - after, after)) def resolve(tree, paths=None, ignore_misses=False, recursive=False, action='done'): """Resolve some or all of the conflicts in a working tree. :param paths: If None, resolve all conflicts. Otherwise, select only specified conflicts. :param recursive: If True, then elements of paths which are directories have all their children resolved, etc. When invoked as part of recursive commands like revert, this should be True. For commands or applications wishing finer-grained control, like the resolve command, this should be False. :param ignore_misses: If False, warnings will be printed if the supplied paths do not have conflicts. :param action: How the conflict should be resolved, """ tree.lock_tree_write() nb_conflicts_after = None try: tree_conflicts = tree.conflicts() nb_conflicts_before = len(tree_conflicts) if paths is None: new_conflicts = ConflictList() to_process = tree_conflicts else: new_conflicts, to_process = tree_conflicts.select_conflicts( tree, paths, ignore_misses, recursive) for conflict in to_process: try: conflict._do(action, tree) conflict.cleanup(tree) except NotImplementedError: new_conflicts.append(conflict) try: nb_conflicts_after = len(new_conflicts) tree.set_conflicts(new_conflicts) except errors.UnsupportedOperation: pass finally: tree.unlock() if nb_conflicts_after is None: nb_conflicts_after = nb_conflicts_before return nb_conflicts_before, nb_conflicts_after def restore(filename): """Restore a conflicted file to the state it was in before merging. Only text restoration is supported at present. """ conflicted = False try: osutils.rename(filename + ".THIS", filename) conflicted = True except OSError, e: if e.errno != errno.ENOENT: raise try: os.unlink(filename + ".BASE") conflicted = True except OSError, e: if e.errno != errno.ENOENT: raise try: os.unlink(filename + ".OTHER") conflicted = True except OSError, e: if e.errno != errno.ENOENT: raise if not conflicted: raise errors.NotConflicted(filename) class ConflictList(object): """List of conflicts. Typically obtained from WorkingTree.conflicts() Can be instantiated from stanzas or from Conflict subclasses. """ def __init__(self, conflicts=None): object.__init__(self) if conflicts is None: self.__list = [] else: self.__list = conflicts def is_empty(self): return len(self.__list) == 0 def __len__(self): return len(self.__list) def __iter__(self): return iter(self.__list) def __getitem__(self, key): return self.__list[key] def append(self, conflict): return self.__list.append(conflict) def __eq__(self, other_list): return list(self) == list(other_list) def __ne__(self, other_list): return not (self == other_list) def __repr__(self): return "ConflictList(%r)" % self.__list @staticmethod def from_stanzas(stanzas): """Produce a new ConflictList from an iterable of stanzas""" conflicts = ConflictList() for stanza in stanzas: conflicts.append(Conflict.factory(**stanza.as_dict())) return conflicts def to_stanzas(self): """Generator of stanzas""" for conflict in self: yield conflict.as_stanza() def to_strings(self): """Generate strings for the provided conflicts""" for conflict in self: yield unicode(conflict) def remove_files(self, tree): """Remove the THIS, BASE and OTHER files for listed conflicts""" for conflict in self: if not conflict.has_files: continue conflict.cleanup(tree) def select_conflicts(self, tree, paths, ignore_misses=False, recurse=False): """Select the conflicts associated with paths in a tree. File-ids are also used for this. :return: a pair of ConflictLists: (not_selected, selected) """ path_set = set(paths) ids = {} selected_paths = set() new_conflicts = ConflictList() selected_conflicts = ConflictList() for path in paths: file_id = tree.path2id(path) if file_id is not None: ids[file_id] = path for conflict in self: selected = False for key in ('path', 'conflict_path'): cpath = getattr(conflict, key, None) if cpath is None: continue if cpath in path_set: selected = True selected_paths.add(cpath) if recurse: if osutils.is_inside_any(path_set, cpath): selected = True selected_paths.add(cpath) for key in ('file_id', 'conflict_file_id'): cfile_id = getattr(conflict, key, None) if cfile_id is None: continue try: cpath = ids[cfile_id] except KeyError: continue selected = True selected_paths.add(cpath) if selected: selected_conflicts.append(conflict) else: new_conflicts.append(conflict) if ignore_misses is not True: for path in [p for p in paths if p not in selected_paths]: if not os.path.exists(tree.abspath(path)): print "%s does not exist" % path else: print "%s is not conflicted" % path return new_conflicts, selected_conflicts class Conflict(object): """Base class for all types of conflict""" # FIXME: cleanup should take care of that ? -- vila 091229 has_files = False def __init__(self, path, file_id=None): self.path = path # warn turned off, because the factory blindly transfers the Stanza # values to __init__ and Stanza is purely a Unicode api. self.file_id = osutils.safe_file_id(file_id, warn=False) def as_stanza(self): s = rio.Stanza(type=self.typestring, path=self.path) if self.file_id is not None: # Stanza requires Unicode apis s.add('file_id', self.file_id.decode('utf8')) return s def _cmp_list(self): return [type(self), self.path, self.file_id] def __cmp__(self, other): if getattr(other, "_cmp_list", None) is None: return -1 return cmp(self._cmp_list(), other._cmp_list()) def __hash__(self): return hash((type(self), self.path, self.file_id)) def __eq__(self, other): return self.__cmp__(other) == 0 def __ne__(self, other): return not self.__eq__(other) def __unicode__(self): return self.format % self.__dict__ def __repr__(self): rdict = dict(self.__dict__) rdict['class'] = self.__class__.__name__ return self.rformat % rdict @staticmethod def factory(type, **kwargs): global ctype return ctype[type](**kwargs) @staticmethod def sort_key(conflict): if conflict.path is not None: return conflict.path, conflict.typestring elif getattr(conflict, "conflict_path", None) is not None: return conflict.conflict_path, conflict.typestring else: return None, conflict.typestring def _do(self, action, tree): """Apply the specified action to the conflict. :param action: The method name to call. :param tree: The tree passed as a parameter to the method. """ meth = getattr(self, 'action_%s' % action, None) if meth is None: raise NotImplementedError(self.__class__.__name__ + '.' + action) meth(tree) def associated_filenames(self): """The names of the files generated to help resolve the conflict.""" raise NotImplementedError(self.associated_filenames) def cleanup(self, tree): for fname in self.associated_filenames(): try: osutils.delete_any(tree.abspath(fname)) except OSError, e: if e.errno != errno.ENOENT: raise def action_done(self, tree): """Mark the conflict as solved once it has been handled.""" # This method does nothing but simplifies the design of upper levels. pass def action_take_this(self, tree): raise NotImplementedError(self.action_take_this) def action_take_other(self, tree): raise NotImplementedError(self.action_take_other) def _resolve_with_cleanups(self, tree, *args, **kwargs): tt = transform.TreeTransform(tree) op = cleanup.OperationWithCleanups(self._resolve) op.add_cleanup(tt.finalize) op.run_simple(tt, *args, **kwargs) class PathConflict(Conflict): """A conflict was encountered merging file paths""" typestring = 'path conflict' format = 'Path conflict: %(path)s / %(conflict_path)s' rformat = '%(class)s(%(path)r, %(conflict_path)r, %(file_id)r)' def __init__(self, path, conflict_path=None, file_id=None): Conflict.__init__(self, path, file_id) self.conflict_path = conflict_path def as_stanza(self): s = Conflict.as_stanza(self) if self.conflict_path is not None: s.add('conflict_path', self.conflict_path) return s def associated_filenames(self): # No additional files have been generated here return [] def _resolve(self, tt, file_id, path, winner): """Resolve the conflict. :param tt: The TreeTransform where the conflict is resolved. :param file_id: The retained file id. :param path: The retained path. :param winner: 'this' or 'other' indicates which side is the winner. """ path_to_create = None if winner == 'this': if self.path == '': return # Nothing to do if self.conflict_path == '': path_to_create = self.path revid = tt._tree.get_parent_ids()[0] elif winner == 'other': if self.conflict_path == '': return # Nothing to do if self.path == '': path_to_create = self.conflict_path # FIXME: If there are more than two parents we may need to # iterate. Taking the last parent is the safer bet in the mean # time. -- vila 20100309 revid = tt._tree.get_parent_ids()[-1] else: # Programmer error raise AssertionError('bad winner: %r' % (winner,)) if path_to_create is not None: tid = tt.trans_id_tree_path(path_to_create) transform.create_from_tree( tt, tid, self._revision_tree(tt._tree, revid), file_id) tt.version_file(file_id, tid) else: tid = tt.trans_id_file_id(file_id) # Adjust the path for the retained file id parent_tid = tt.get_tree_parent(tid) tt.adjust_path(osutils.basename(path), parent_tid, tid) tt.apply() def _revision_tree(self, tree, revid): return tree.branch.repository.revision_tree(revid) def _infer_file_id(self, tree): # Prior to bug #531967, file_id wasn't always set, there may still be # conflict files in the wild so we need to cope with them # Establish which path we should use to find back the file-id possible_paths = [] for p in (self.path, self.conflict_path): if p == '': # special hard-coded path continue if p is not None: possible_paths.append(p) # Search the file-id in the parents with any path available file_id = None for revid in tree.get_parent_ids(): revtree = self._revision_tree(tree, revid) for p in possible_paths: file_id = revtree.path2id(p) if file_id is not None: return revtree, file_id return None, None def action_take_this(self, tree): if self.file_id is not None: self._resolve_with_cleanups(tree, self.file_id, self.path, winner='this') else: # Prior to bug #531967 we need to find back the file_id and restore # the content from there revtree, file_id = self._infer_file_id(tree) tree.revert([revtree.id2path(file_id)], old_tree=revtree, backups=False) def action_take_other(self, tree): if self.file_id is not None: self._resolve_with_cleanups(tree, self.file_id, self.conflict_path, winner='other') else: # Prior to bug #531967 we need to find back the file_id and restore # the content from there revtree, file_id = self._infer_file_id(tree) tree.revert([revtree.id2path(file_id)], old_tree=revtree, backups=False) class ContentsConflict(PathConflict): """The files are of different types (or both binary), or not present""" has_files = True typestring = 'contents conflict' format = 'Contents conflict in %(path)s' def associated_filenames(self): return [self.path + suffix for suffix in ('.BASE', '.OTHER')] def _resolve(self, tt, suffix_to_remove): """Resolve the conflict. :param tt: The TreeTransform where the conflict is resolved. :param suffix_to_remove: Either 'THIS' or 'OTHER' The resolution is symmetric: when taking THIS, OTHER is deleted and item.THIS is renamed into item and vice-versa. """ try: # Delete 'item.THIS' or 'item.OTHER' depending on # suffix_to_remove tt.delete_contents( tt.trans_id_tree_path(self.path + '.' + suffix_to_remove)) except errors.NoSuchFile: # There are valid cases where 'item.suffix_to_remove' either # never existed or was already deleted (including the case # where the user deleted it) pass try: this_path = tt._tree.id2path(self.file_id) except errors.NoSuchId: # The file is not present anymore. This may happen if the user # deleted the file either manually or when resolving a conflict on # the parent. We may raise some exception to indicate that the # conflict doesn't exist anymore and as such doesn't need to be # resolved ? -- vila 20110615 this_tid = None else: this_tid = tt.trans_id_tree_path(this_path) if this_tid is not None: # Rename 'item.suffix_to_remove' (note that if # 'item.suffix_to_remove' has been deleted, this is a no-op) parent_tid = tt.get_tree_parent(this_tid) tt.adjust_path(osutils.basename(self.path), parent_tid, this_tid) tt.apply() def action_take_this(self, tree): self._resolve_with_cleanups(tree, 'OTHER') def action_take_other(self, tree): self._resolve_with_cleanups(tree, 'THIS') # TODO: There should be a base revid attribute to better inform the user about # how the conflicts were generated. class TextConflict(Conflict): """The merge algorithm could not resolve all differences encountered.""" has_files = True typestring = 'text conflict' format = 'Text conflict in %(path)s' rformat = '%(class)s(%(path)r, %(file_id)r)' def associated_filenames(self): return [self.path + suffix for suffix in CONFLICT_SUFFIXES] def _resolve(self, tt, winner_suffix): """Resolve the conflict by copying one of .THIS or .OTHER into file. :param tt: The TreeTransform where the conflict is resolved. :param winner_suffix: Either 'THIS' or 'OTHER' The resolution is symmetric, when taking THIS, item.THIS is renamed into item and vice-versa. This takes one of the files as a whole ignoring every difference that could have been merged cleanly. """ # To avoid useless copies, we switch item and item.winner_suffix, only # item will exist after the conflict has been resolved anyway. item_tid = tt.trans_id_file_id(self.file_id) item_parent_tid = tt.get_tree_parent(item_tid) winner_path = self.path + '.' + winner_suffix winner_tid = tt.trans_id_tree_path(winner_path) winner_parent_tid = tt.get_tree_parent(winner_tid) # Switch the paths to preserve the content tt.adjust_path(osutils.basename(self.path), winner_parent_tid, winner_tid) tt.adjust_path(osutils.basename(winner_path), item_parent_tid, item_tid) # Associate the file_id to the right content tt.unversion_file(item_tid) tt.version_file(self.file_id, winner_tid) tt.apply() def action_take_this(self, tree): self._resolve_with_cleanups(tree, 'THIS') def action_take_other(self, tree): self._resolve_with_cleanups(tree, 'OTHER') class HandledConflict(Conflict): """A path problem that has been provisionally resolved. This is intended to be a base class. """ rformat = "%(class)s(%(action)r, %(path)r, %(file_id)r)" def __init__(self, action, path, file_id=None): Conflict.__init__(self, path, file_id) self.action = action def _cmp_list(self): return Conflict._cmp_list(self) + [self.action] def as_stanza(self): s = Conflict.as_stanza(self) s.add('action', self.action) return s def associated_filenames(self): # Nothing has been generated here return [] class HandledPathConflict(HandledConflict): """A provisionally-resolved path problem involving two paths. This is intended to be a base class. """ rformat = "%(class)s(%(action)r, %(path)r, %(conflict_path)r,"\ " %(file_id)r, %(conflict_file_id)r)" def __init__(self, action, path, conflict_path, file_id=None, conflict_file_id=None): HandledConflict.__init__(self, action, path, file_id) self.conflict_path = conflict_path # warn turned off, because the factory blindly transfers the Stanza # values to __init__. self.conflict_file_id = osutils.safe_file_id(conflict_file_id, warn=False) def _cmp_list(self): return HandledConflict._cmp_list(self) + [self.conflict_path, self.conflict_file_id] def as_stanza(self): s = HandledConflict.as_stanza(self) s.add('conflict_path', self.conflict_path) if self.conflict_file_id is not None: s.add('conflict_file_id', self.conflict_file_id.decode('utf8')) return s class DuplicateID(HandledPathConflict): """Two files want the same file_id.""" typestring = 'duplicate id' format = 'Conflict adding id to %(conflict_path)s. %(action)s %(path)s.' class DuplicateEntry(HandledPathConflict): """Two directory entries want to have the same name.""" typestring = 'duplicate' format = 'Conflict adding file %(conflict_path)s. %(action)s %(path)s.' def action_take_this(self, tree): tree.remove([self.conflict_path], force=True, keep_files=False) tree.rename_one(self.path, self.conflict_path) def action_take_other(self, tree): tree.remove([self.path], force=True, keep_files=False) class ParentLoop(HandledPathConflict): """An attempt to create an infinitely-looping directory structure. This is rare, but can be produced like so: tree A: mv foo bar tree B: mv bar foo merge A and B """ typestring = 'parent loop' format = 'Conflict moving %(path)s into %(conflict_path)s. %(action)s.' def action_take_this(self, tree): # just acccept bzr proposal pass def action_take_other(self, tree): tt = transform.TreeTransform(tree) try: p_tid = tt.trans_id_file_id(self.file_id) parent_tid = tt.get_tree_parent(p_tid) cp_tid = tt.trans_id_file_id(self.conflict_file_id) cparent_tid = tt.get_tree_parent(cp_tid) tt.adjust_path(osutils.basename(self.path), cparent_tid, cp_tid) tt.adjust_path(osutils.basename(self.conflict_path), parent_tid, p_tid) tt.apply() finally: tt.finalize() class UnversionedParent(HandledConflict): """An attempt to version a file whose parent directory is not versioned. Typically, the result of a merge where one tree unversioned the directory and the other added a versioned file to it. """ typestring = 'unversioned parent' format = 'Conflict because %(path)s is not versioned, but has versioned'\ ' children. %(action)s.' # FIXME: We silently do nothing to make tests pass, but most probably the # conflict shouldn't exist (the long story is that the conflict is # generated with another one that can be resolved properly) -- vila 091224 def action_take_this(self, tree): pass def action_take_other(self, tree): pass class MissingParent(HandledConflict): """An attempt to add files to a directory that is not present. Typically, the result of a merge where THIS deleted the directory and the OTHER added a file to it. See also: DeletingParent (same situation, THIS and OTHER reversed) """ typestring = 'missing parent' format = 'Conflict adding files to %(path)s. %(action)s.' def action_take_this(self, tree): tree.remove([self.path], force=True, keep_files=False) def action_take_other(self, tree): # just acccept bzr proposal pass class DeletingParent(HandledConflict): """An attempt to add files to a directory that is not present. Typically, the result of a merge where one OTHER deleted the directory and the THIS added a file to it. """ typestring = 'deleting parent' format = "Conflict: can't delete %(path)s because it is not empty. "\ "%(action)s." # FIXME: It's a bit strange that the default action is not coherent with # MissingParent from the *user* pov. def action_take_this(self, tree): # just acccept bzr proposal pass def action_take_other(self, tree): tree.remove([self.path], force=True, keep_files=False) class NonDirectoryParent(HandledConflict): """An attempt to add files to a directory that is not a directory or an attempt to change the kind of a directory with files. """ typestring = 'non-directory parent' format = "Conflict: %(path)s is not a directory, but has files in it."\ " %(action)s." # FIXME: .OTHER should be used instead of .new when the conflict is created def action_take_this(self, tree): # FIXME: we should preserve that path when the conflict is generated ! if self.path.endswith('.new'): conflict_path = self.path[:-(len('.new'))] tree.remove([self.path], force=True, keep_files=False) tree.add(conflict_path) else: raise NotImplementedError(self.action_take_this) def action_take_other(self, tree): # FIXME: we should preserve that path when the conflict is generated ! if self.path.endswith('.new'): conflict_path = self.path[:-(len('.new'))] tree.remove([conflict_path], force=True, keep_files=False) tree.rename_one(self.path, conflict_path) else: raise NotImplementedError(self.action_take_other) ctype = {} def register_types(*conflict_types): """Register a Conflict subclass for serialization purposes""" global ctype for conflict_type in conflict_types: ctype[conflict_type.typestring] = conflict_type register_types(ContentsConflict, TextConflict, PathConflict, DuplicateID, DuplicateEntry, ParentLoop, UnversionedParent, MissingParent, DeletingParent, NonDirectoryParent) bzr-2.7.0/bzrlib/controldir.py0000644000000000000000000016037011716514135014473 0ustar 00000000000000# Copyright (C) 2010, 2011, 2012 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """ControlDir is the basic control directory class. The ControlDir class is the base for the control directory used by all bzr and foreign formats. For the ".bzr" implementation, see bzrlib.bzrdir.BzrDir. """ from __future__ import absolute_import from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import textwrap from bzrlib import ( errors, hooks, revision as _mod_revision, transport as _mod_transport, trace, ui, urlutils, ) from bzrlib.transport import local from bzrlib.push import ( PushResult, ) from bzrlib.i18n import gettext """) from bzrlib import registry class ControlComponent(object): """Abstract base class for control directory components. This provides interfaces that are common across controldirs, repositories, branches, and workingtree control directories. They all expose two urls and transports: the *user* URL is the one that stops above the control directory (eg .bzr) and that should normally be used in messages, and the *control* URL is under that in eg .bzr/checkout and is used to read the control files. This can be used as a mixin and is intended to fit with foreign formats. """ @property def control_transport(self): raise NotImplementedError @property def control_url(self): return self.control_transport.base @property def user_transport(self): raise NotImplementedError @property def user_url(self): return self.user_transport.base class ControlDir(ControlComponent): """A control directory. While this represents a generic control directory, there are a few features that are present in this interface that are currently only supported by one of its implementations, BzrDir. These features (bound branches, stacked branches) are currently only supported by Bazaar, but could be supported by other version control systems as well. Implementations are required to raise the appropriate exceptions when an operation is requested that is not supported. This also makes life easier for API users who can rely on the implementation always allowing a particular feature to be requested but raising an exception when it is not supported, rather than requiring the API users to check for magic attributes to see what features are supported. """ def can_convert_format(self): """Return true if this controldir is one whose format we can convert from.""" return True def list_branches(self): """Return a sequence of all branches local to this control directory. """ return self.get_branches().values() def get_branches(self): """Get all branches in this control directory, as a dictionary. :return: Dictionary mapping branch names to instances. """ try: return { "": self.open_branch() } except (errors.NotBranchError, errors.NoRepositoryPresent): return {} def is_control_filename(self, filename): """True if filename is the name of a path which is reserved for controldirs. :param filename: A filename within the root transport of this controldir. This is true IF and ONLY IF the filename is part of the namespace reserved for bzr control dirs. Currently this is the '.bzr' directory in the root of the root_transport. it is expected that plugins will need to extend this in the future - for instance to make bzr talk with svn working trees. """ raise NotImplementedError(self.is_control_filename) def needs_format_conversion(self, format=None): """Return true if this controldir needs convert_format run on it. For instance, if the repository format is out of date but the branch and working tree are not, this should return True. :param format: Optional parameter indicating a specific desired format we plan to arrive at. """ raise NotImplementedError(self.needs_format_conversion) def create_repository(self, shared=False): """Create a new repository in this control directory. :param shared: If a shared repository should be created :return: The newly created repository """ raise NotImplementedError(self.create_repository) def destroy_repository(self): """Destroy the repository in this ControlDir.""" raise NotImplementedError(self.destroy_repository) def create_branch(self, name=None, repository=None, append_revisions_only=None): """Create a branch in this ControlDir. :param name: Name of the colocated branch to create, None for the user selected branch or "" for the active branch. :param append_revisions_only: Whether this branch should only allow appending new revisions to its history. The controldirs format will control what branch format is created. For more control see BranchFormatXX.create(a_controldir). """ raise NotImplementedError(self.create_branch) def destroy_branch(self, name=None): """Destroy a branch in this ControlDir. :param name: Name of the branch to destroy, None for the user selected branch or "" for the active branch. :raise NotBranchError: When the branch does not exist """ raise NotImplementedError(self.destroy_branch) def create_workingtree(self, revision_id=None, from_branch=None, accelerator_tree=None, hardlink=False): """Create a working tree at this ControlDir. :param revision_id: create it as of this revision id. :param from_branch: override controldir branch (for lightweight checkouts) :param accelerator_tree: A tree which can be used for retrieving file contents more quickly than the revision tree, i.e. a workingtree. The revision tree will be used for cases where accelerator_tree's content is different. """ raise NotImplementedError(self.create_workingtree) def destroy_workingtree(self): """Destroy the working tree at this ControlDir. Formats that do not support this may raise UnsupportedOperation. """ raise NotImplementedError(self.destroy_workingtree) def destroy_workingtree_metadata(self): """Destroy the control files for the working tree at this ControlDir. The contents of working tree files are not affected. Formats that do not support this may raise UnsupportedOperation. """ raise NotImplementedError(self.destroy_workingtree_metadata) def find_branch_format(self, name=None): """Find the branch 'format' for this controldir. This might be a synthetic object for e.g. RemoteBranch and SVN. """ raise NotImplementedError(self.find_branch_format) def get_branch_reference(self, name=None): """Return the referenced URL for the branch in this controldir. :param name: Optional colocated branch name :raises NotBranchError: If there is no Branch. :raises NoColocatedBranchSupport: If a branch name was specified but colocated branches are not supported. :return: The URL the branch in this controldir references if it is a reference branch, or None for regular branches. """ if name is not None: raise errors.NoColocatedBranchSupport(self) return None def set_branch_reference(self, target_branch, name=None): """Set the referenced URL for the branch in this controldir. :param name: Optional colocated branch name :param target_branch: Branch to reference :raises NoColocatedBranchSupport: If a branch name was specified but colocated branches are not supported. :return: The referencing branch """ raise NotImplementedError(self.set_branch_reference) def open_branch(self, name=None, unsupported=False, ignore_fallbacks=False, possible_transports=None): """Open the branch object at this ControlDir if one is present. :param unsupported: if True, then no longer supported branch formats can still be opened. :param ignore_fallbacks: Whether to open fallback repositories :param possible_transports: Transports to use for opening e.g. fallback repositories. """ raise NotImplementedError(self.open_branch) def open_repository(self, _unsupported=False): """Open the repository object at this ControlDir if one is present. This will not follow the Branch object pointer - it's strictly a direct open facility. Most client code should use open_branch().repository to get at a repository. :param _unsupported: a private parameter, not part of the api. """ raise NotImplementedError(self.open_repository) def find_repository(self): """Find the repository that should be used. This does not require a branch as we use it to find the repo for new branches as well as to hook existing branches up to their repository. """ raise NotImplementedError(self.find_repository) def open_workingtree(self, unsupported=False, recommend_upgrade=True, from_branch=None): """Open the workingtree object at this ControlDir if one is present. :param recommend_upgrade: Optional keyword parameter, when True (the default), emit through the ui module a recommendation that the user upgrade the working tree when the workingtree being opened is old (but still fully supported). :param from_branch: override controldir branch (for lightweight checkouts) """ raise NotImplementedError(self.open_workingtree) def has_branch(self, name=None): """Tell if this controldir contains a branch. Note: if you're going to open the branch, you should just go ahead and try, and not ask permission first. (This method just opens the branch and discards it, and that's somewhat expensive.) """ try: self.open_branch(name, ignore_fallbacks=True) return True except errors.NotBranchError: return False def _get_selected_branch(self): """Return the name of the branch selected by the user. :return: Name of the branch selected by the user, or "". """ branch = self.root_transport.get_segment_parameters().get("branch") if branch is None: branch = "" return urlutils.unescape(branch) def has_workingtree(self): """Tell if this controldir contains a working tree. This will still raise an exception if the controldir has a workingtree that is remote & inaccessible. Note: if you're going to open the working tree, you should just go ahead and try, and not ask permission first. (This method just opens the workingtree and discards it, and that's somewhat expensive.) """ try: self.open_workingtree(recommend_upgrade=False) return True except errors.NoWorkingTree: return False def cloning_metadir(self, require_stacking=False): """Produce a metadir suitable for cloning or sprouting with. These operations may produce workingtrees (yes, even though they're "cloning" something that doesn't have a tree), so a viable workingtree format must be selected. :require_stacking: If True, non-stackable formats will be upgraded to similar stackable formats. :returns: a ControlDirFormat with all component formats either set appropriately or set to None if that component should not be created. """ raise NotImplementedError(self.cloning_metadir) def checkout_metadir(self): """Produce a metadir suitable for checkouts of this controldir. :returns: A ControlDirFormat with all component formats either set appropriately or set to None if that component should not be created. """ return self.cloning_metadir() def sprout(self, url, revision_id=None, force_new_repo=False, recurse='down', possible_transports=None, accelerator_tree=None, hardlink=False, stacked=False, source_branch=None, create_tree_if_local=True): """Create a copy of this controldir prepared for use as a new line of development. If url's last component does not exist, it will be created. Attributes related to the identity of the source branch like branch nickname will be cleaned, a working tree is created whether one existed before or not; and a local branch is always created. :param revision_id: if revision_id is not None, then the clone operation may tune itself to download less data. :param accelerator_tree: A tree which can be used for retrieving file contents more quickly than the revision tree, i.e. a workingtree. The revision tree will be used for cases where accelerator_tree's content is different. :param hardlink: If true, hard-link files from accelerator_tree, where possible. :param stacked: If true, create a stacked branch referring to the location of this control directory. :param create_tree_if_local: If true, a working-tree will be created when working locally. """ raise NotImplementedError(self.sprout) def push_branch(self, source, revision_id=None, overwrite=False, remember=False, create_prefix=False): """Push the source branch into this ControlDir.""" br_to = None # If we can open a branch, use its direct repository, otherwise see # if there is a repository without a branch. try: br_to = self.open_branch() except errors.NotBranchError: # Didn't find a branch, can we find a repository? repository_to = self.find_repository() else: # Found a branch, so we must have found a repository repository_to = br_to.repository push_result = PushResult() push_result.source_branch = source if br_to is None: # We have a repository but no branch, copy the revisions, and then # create a branch. if revision_id is None: # No revision supplied by the user, default to the branch # revision revision_id = source.last_revision() repository_to.fetch(source.repository, revision_id=revision_id) br_to = source.clone(self, revision_id=revision_id) if source.get_push_location() is None or remember: # FIXME: Should be done only if we succeed ? -- vila 2012-01-18 source.set_push_location(br_to.base) push_result.stacked_on = None push_result.branch_push_result = None push_result.old_revno = None push_result.old_revid = _mod_revision.NULL_REVISION push_result.target_branch = br_to push_result.master_branch = None push_result.workingtree_updated = False else: # We have successfully opened the branch, remember if necessary: if source.get_push_location() is None or remember: # FIXME: Should be done only if we succeed ? -- vila 2012-01-18 source.set_push_location(br_to.base) try: tree_to = self.open_workingtree() except errors.NotLocalUrl: push_result.branch_push_result = source.push(br_to, overwrite, stop_revision=revision_id) push_result.workingtree_updated = False except errors.NoWorkingTree: push_result.branch_push_result = source.push(br_to, overwrite, stop_revision=revision_id) push_result.workingtree_updated = None # Not applicable else: tree_to.lock_write() try: push_result.branch_push_result = source.push( tree_to.branch, overwrite, stop_revision=revision_id) tree_to.update() finally: tree_to.unlock() push_result.workingtree_updated = True push_result.old_revno = push_result.branch_push_result.old_revno push_result.old_revid = push_result.branch_push_result.old_revid push_result.target_branch = \ push_result.branch_push_result.target_branch return push_result def _get_tree_branch(self, name=None): """Return the branch and tree, if any, for this controldir. :param name: Name of colocated branch to open. Return None for tree if not present or inaccessible. Raise NotBranchError if no branch is present. :return: (tree, branch) """ try: tree = self.open_workingtree() except (errors.NoWorkingTree, errors.NotLocalUrl): tree = None branch = self.open_branch(name=name) else: if name is not None: branch = self.open_branch(name=name) else: branch = tree.branch return tree, branch def get_config(self): """Get configuration for this ControlDir.""" raise NotImplementedError(self.get_config) def check_conversion_target(self, target_format): """Check that a controldir as a whole can be converted to a new format.""" raise NotImplementedError(self.check_conversion_target) def clone(self, url, revision_id=None, force_new_repo=False, preserve_stacking=False): """Clone this controldir and its contents to url verbatim. :param url: The url create the clone at. If url's last component does not exist, it will be created. :param revision_id: The tip revision-id to use for any branch or working tree. If not None, then the clone operation may tune itself to download less data. :param force_new_repo: Do not use a shared repository for the target even if one is available. :param preserve_stacking: When cloning a stacked branch, stack the new branch on top of the other branch's stacked-on branch. """ return self.clone_on_transport(_mod_transport.get_transport(url), revision_id=revision_id, force_new_repo=force_new_repo, preserve_stacking=preserve_stacking) def clone_on_transport(self, transport, revision_id=None, force_new_repo=False, preserve_stacking=False, stacked_on=None, create_prefix=False, use_existing_dir=True, no_tree=False): """Clone this controldir and its contents to transport verbatim. :param transport: The transport for the location to produce the clone at. If the target directory does not exist, it will be created. :param revision_id: The tip revision-id to use for any branch or working tree. If not None, then the clone operation may tune itself to download less data. :param force_new_repo: Do not use a shared repository for the target, even if one is available. :param preserve_stacking: When cloning a stacked branch, stack the new branch on top of the other branch's stacked-on branch. :param create_prefix: Create any missing directories leading up to to_transport. :param use_existing_dir: Use an existing directory if one exists. :param no_tree: If set to true prevents creation of a working tree. """ raise NotImplementedError(self.clone_on_transport) @classmethod def find_bzrdirs(klass, transport, evaluate=None, list_current=None): """Find control dirs recursively from current location. This is intended primarily as a building block for more sophisticated functionality, like finding trees under a directory, or finding branches that use a given repository. :param evaluate: An optional callable that yields recurse, value, where recurse controls whether this controldir is recursed into and value is the value to yield. By default, all bzrdirs are recursed into, and the return value is the controldir. :param list_current: if supplied, use this function to list the current directory, instead of Transport.list_dir :return: a generator of found bzrdirs, or whatever evaluate returns. """ if list_current is None: def list_current(transport): return transport.list_dir('') if evaluate is None: def evaluate(controldir): return True, controldir pending = [transport] while len(pending) > 0: current_transport = pending.pop() recurse = True try: controldir = klass.open_from_transport(current_transport) except (errors.NotBranchError, errors.PermissionDenied): pass else: recurse, value = evaluate(controldir) yield value try: subdirs = list_current(current_transport) except (errors.NoSuchFile, errors.PermissionDenied): continue if recurse: for subdir in sorted(subdirs, reverse=True): pending.append(current_transport.clone(subdir)) @classmethod def find_branches(klass, transport): """Find all branches under a transport. This will find all branches below the transport, including branches inside other branches. Where possible, it will use Repository.find_branches. To list all the branches that use a particular Repository, see Repository.find_branches """ def evaluate(controldir): try: repository = controldir.open_repository() except errors.NoRepositoryPresent: pass else: return False, ([], repository) return True, (controldir.list_branches(), None) ret = [] for branches, repo in klass.find_bzrdirs( transport, evaluate=evaluate): if repo is not None: ret.extend(repo.find_branches()) if branches is not None: ret.extend(branches) return ret @classmethod def create_branch_and_repo(klass, base, force_new_repo=False, format=None): """Create a new ControlDir, Branch and Repository at the url 'base'. This will use the current default ControlDirFormat unless one is specified, and use whatever repository format that that uses via controldir.create_branch and create_repository. If a shared repository is available that is used preferentially. The created Branch object is returned. :param base: The URL to create the branch at. :param force_new_repo: If True a new repository is always created. :param format: If supplied, the format of branch to create. If not supplied, the default is used. """ controldir = klass.create(base, format) controldir._find_or_create_repository(force_new_repo) return controldir.create_branch() @classmethod def create_branch_convenience(klass, base, force_new_repo=False, force_new_tree=None, format=None, possible_transports=None): """Create a new ControlDir, Branch and Repository at the url 'base'. This is a convenience function - it will use an existing repository if possible, can be told explicitly whether to create a working tree or not. This will use the current default ControlDirFormat unless one is specified, and use whatever repository format that that uses via ControlDir.create_branch and create_repository. If a shared repository is available that is used preferentially. Whatever repository is used, its tree creation policy is followed. The created Branch object is returned. If a working tree cannot be made due to base not being a file:// url, no error is raised unless force_new_tree is True, in which case no data is created on disk and NotLocalUrl is raised. :param base: The URL to create the branch at. :param force_new_repo: If True a new repository is always created. :param force_new_tree: If True or False force creation of a tree or prevent such creation respectively. :param format: Override for the controldir format to create. :param possible_transports: An optional reusable transports list. """ if force_new_tree: # check for non local urls t = _mod_transport.get_transport(base, possible_transports) if not isinstance(t, local.LocalTransport): raise errors.NotLocalUrl(base) controldir = klass.create(base, format, possible_transports) repo = controldir._find_or_create_repository(force_new_repo) result = controldir.create_branch() if force_new_tree or (repo.make_working_trees() and force_new_tree is None): try: controldir.create_workingtree() except errors.NotLocalUrl: pass return result @classmethod def create_standalone_workingtree(klass, base, format=None): """Create a new ControlDir, WorkingTree, Branch and Repository at 'base'. 'base' must be a local path or a file:// url. This will use the current default ControlDirFormat unless one is specified, and use whatever repository format that that uses for bzrdirformat.create_workingtree, create_branch and create_repository. :param format: Override for the controldir format to create. :return: The WorkingTree object. """ t = _mod_transport.get_transport(base) if not isinstance(t, local.LocalTransport): raise errors.NotLocalUrl(base) controldir = klass.create_branch_and_repo(base, force_new_repo=True, format=format).bzrdir return controldir.create_workingtree() @classmethod def open_unsupported(klass, base): """Open a branch which is not supported.""" return klass.open(base, _unsupported=True) @classmethod def open(klass, base, possible_transports=None, probers=None, _unsupported=False): """Open an existing controldir, rooted at 'base' (url). :param _unsupported: a private parameter to the ControlDir class. """ t = _mod_transport.get_transport(base, possible_transports) return klass.open_from_transport(t, probers=probers, _unsupported=_unsupported) @classmethod def open_from_transport(klass, transport, _unsupported=False, probers=None): """Open a controldir within a particular directory. :param transport: Transport containing the controldir. :param _unsupported: private. """ for hook in klass.hooks['pre_open']: hook(transport) # Keep initial base since 'transport' may be modified while following # the redirections. base = transport.base def find_format(transport): return transport, ControlDirFormat.find_format(transport, probers=probers) def redirected(transport, e, redirection_notice): redirected_transport = transport._redirected_to(e.source, e.target) if redirected_transport is None: raise errors.NotBranchError(base) trace.note(gettext('{0} is{1} redirected to {2}').format( transport.base, e.permanently, redirected_transport.base)) return redirected_transport try: transport, format = _mod_transport.do_catching_redirections( find_format, transport, redirected) except errors.TooManyRedirections: raise errors.NotBranchError(base) format.check_support_status(_unsupported) return format.open(transport, _found=True) @classmethod def open_containing(klass, url, possible_transports=None): """Open an existing branch which contains url. :param url: url to search from. See open_containing_from_transport for more detail. """ transport = _mod_transport.get_transport(url, possible_transports) return klass.open_containing_from_transport(transport) @classmethod def open_containing_from_transport(klass, a_transport): """Open an existing branch which contains a_transport.base. This probes for a branch at a_transport, and searches upwards from there. Basically we keep looking up until we find the control directory or run into the root. If there isn't one, raises NotBranchError. If there is one and it is either an unrecognised format or an unsupported format, UnknownFormatError or UnsupportedFormatError are raised. If there is one, it is returned, along with the unused portion of url. :return: The ControlDir that contains the path, and a Unicode path for the rest of the URL. """ # this gets the normalised url back. I.e. '.' -> the full path. url = a_transport.base while True: try: result = klass.open_from_transport(a_transport) return result, urlutils.unescape(a_transport.relpath(url)) except errors.NotBranchError, e: pass except errors.PermissionDenied: pass try: new_t = a_transport.clone('..') except errors.InvalidURLJoin: # reached the root, whatever that may be raise errors.NotBranchError(path=url) if new_t.base == a_transport.base: # reached the root, whatever that may be raise errors.NotBranchError(path=url) a_transport = new_t @classmethod def open_tree_or_branch(klass, location): """Return the branch and working tree at a location. If there is no tree at the location, tree will be None. If there is no branch at the location, an exception will be raised :return: (tree, branch) """ controldir = klass.open(location) return controldir._get_tree_branch() @classmethod def open_containing_tree_or_branch(klass, location, possible_transports=None): """Return the branch and working tree contained by a location. Returns (tree, branch, relpath). If there is no tree at containing the location, tree will be None. If there is no branch containing the location, an exception will be raised relpath is the portion of the path that is contained by the branch. """ controldir, relpath = klass.open_containing(location, possible_transports=possible_transports) tree, branch = controldir._get_tree_branch() return tree, branch, relpath @classmethod def open_containing_tree_branch_or_repository(klass, location): """Return the working tree, branch and repo contained by a location. Returns (tree, branch, repository, relpath). If there is no tree containing the location, tree will be None. If there is no branch containing the location, branch will be None. If there is no repository containing the location, repository will be None. relpath is the portion of the path that is contained by the innermost ControlDir. If no tree, branch or repository is found, a NotBranchError is raised. """ controldir, relpath = klass.open_containing(location) try: tree, branch = controldir._get_tree_branch() except errors.NotBranchError: try: repo = controldir.find_repository() return None, None, repo, relpath except (errors.NoRepositoryPresent): raise errors.NotBranchError(location) return tree, branch, branch.repository, relpath @classmethod def create(klass, base, format=None, possible_transports=None): """Create a new ControlDir at the url 'base'. :param format: If supplied, the format of branch to create. If not supplied, the default is used. :param possible_transports: If supplied, a list of transports that can be reused to share a remote connection. """ if klass is not ControlDir: raise AssertionError("ControlDir.create always creates the" "default format, not one of %r" % klass) t = _mod_transport.get_transport(base, possible_transports) t.ensure_base() if format is None: format = ControlDirFormat.get_default_format() return format.initialize_on_transport(t) class ControlDirHooks(hooks.Hooks): """Hooks for ControlDir operations.""" def __init__(self): """Create the default hooks.""" hooks.Hooks.__init__(self, "bzrlib.controldir", "ControlDir.hooks") self.add_hook('pre_open', "Invoked before attempting to open a ControlDir with the transport " "that the open will use.", (1, 14)) self.add_hook('post_repo_init', "Invoked after a repository has been initialized. " "post_repo_init is called with a " "bzrlib.controldir.RepoInitHookParams.", (2, 2)) # install the default hooks ControlDir.hooks = ControlDirHooks() class ControlComponentFormat(object): """A component that can live inside of a control directory.""" upgrade_recommended = False def get_format_description(self): """Return the short description for this format.""" raise NotImplementedError(self.get_format_description) def is_supported(self): """Is this format supported? Supported formats must be initializable and openable. Unsupported formats may not support initialization or committing or some other features depending on the reason for not being supported. """ return True def check_support_status(self, allow_unsupported, recommend_upgrade=True, basedir=None): """Give an error or warning on old formats. :param allow_unsupported: If true, allow opening formats that are strongly deprecated, and which may have limited functionality. :param recommend_upgrade: If true (default), warn the user through the ui object that they may wish to upgrade the object. """ if not allow_unsupported and not self.is_supported(): # see open_downlevel to open legacy branches. raise errors.UnsupportedFormatError(format=self) if recommend_upgrade and self.upgrade_recommended: ui.ui_factory.recommend_upgrade( self.get_format_description(), basedir) @classmethod def get_format_string(cls): raise NotImplementedError(cls.get_format_string) class ControlComponentFormatRegistry(registry.FormatRegistry): """A registry for control components (branch, workingtree, repository).""" def __init__(self, other_registry=None): super(ControlComponentFormatRegistry, self).__init__(other_registry) self._extra_formats = [] def register(self, format): """Register a new format.""" super(ControlComponentFormatRegistry, self).register( format.get_format_string(), format) def remove(self, format): """Remove a registered format.""" super(ControlComponentFormatRegistry, self).remove( format.get_format_string()) def register_extra(self, format): """Register a format that can not be used in a metadir. This is mainly useful to allow custom repository formats, such as older Bazaar formats and foreign formats, to be tested. """ self._extra_formats.append(registry._ObjectGetter(format)) def remove_extra(self, format): """Remove an extra format. """ self._extra_formats.remove(registry._ObjectGetter(format)) def register_extra_lazy(self, module_name, member_name): """Register a format lazily. """ self._extra_formats.append( registry._LazyObjectGetter(module_name, member_name)) def _get_extra(self): """Return all "extra" formats, not usable in meta directories.""" result = [] for getter in self._extra_formats: f = getter.get_obj() if callable(f): f = f() result.append(f) return result def _get_all(self): """Return all formats, even those not usable in metadirs. """ result = [] for name in self.keys(): fmt = self.get(name) if callable(fmt): fmt = fmt() result.append(fmt) return result + self._get_extra() def _get_all_modules(self): """Return a set of the modules providing objects.""" modules = set() for name in self.keys(): modules.add(self._get_module(name)) for getter in self._extra_formats: modules.add(getter.get_module()) return modules class Converter(object): """Converts a disk format object from one format to another.""" def convert(self, to_convert, pb): """Perform the conversion of to_convert, giving feedback via pb. :param to_convert: The disk object to convert. :param pb: a progress bar to use for progress information. """ def step(self, message): """Update the pb by a step.""" self.count +=1 self.pb.update(message, self.count, self.total) class ControlDirFormat(object): """An encapsulation of the initialization and open routines for a format. Formats provide three things: * An initialization routine, * a format string, * an open routine. Formats are placed in a dict by their format string for reference during controldir opening. These should be subclasses of ControlDirFormat for consistency. Once a format is deprecated, just deprecate the initialize and open methods on the format class. Do not deprecate the object, as the object will be created every system load. :cvar colocated_branches: Whether this formats supports colocated branches. :cvar supports_workingtrees: This control directory can co-exist with a working tree. """ _default_format = None """The default format used for new control directories.""" _server_probers = [] """The registered server format probers, e.g. RemoteBzrProber. This is a list of Prober-derived classes. """ _probers = [] """The registered format probers, e.g. BzrProber. This is a list of Prober-derived classes. """ colocated_branches = False """Whether co-located branches are supported for this control dir format. """ supports_workingtrees = True """Whether working trees can exist in control directories of this format. """ fixed_components = False """Whether components can not change format independent of the control dir. """ upgrade_recommended = False """Whether an upgrade from this format is recommended.""" def get_format_description(self): """Return the short description for this format.""" raise NotImplementedError(self.get_format_description) def get_converter(self, format=None): """Return the converter to use to convert controldirs needing converts. This returns a bzrlib.controldir.Converter object. This should return the best upgrader to step this format towards the current default format. In the case of plugins we can/should provide some means for them to extend the range of returnable converters. :param format: Optional format to override the default format of the library. """ raise NotImplementedError(self.get_converter) def is_supported(self): """Is this format supported? Supported formats must be openable. Unsupported formats may not support initialization or committing or some other features depending on the reason for not being supported. """ return True def is_initializable(self): """Whether new control directories of this format can be initialized. """ return self.is_supported() def check_support_status(self, allow_unsupported, recommend_upgrade=True, basedir=None): """Give an error or warning on old formats. :param allow_unsupported: If true, allow opening formats that are strongly deprecated, and which may have limited functionality. :param recommend_upgrade: If true (default), warn the user through the ui object that they may wish to upgrade the object. """ if not allow_unsupported and not self.is_supported(): # see open_downlevel to open legacy branches. raise errors.UnsupportedFormatError(format=self) if recommend_upgrade and self.upgrade_recommended: ui.ui_factory.recommend_upgrade( self.get_format_description(), basedir) def same_model(self, target_format): return (self.repository_format.rich_root_data == target_format.rich_root_data) @classmethod def register_format(klass, format): """Register a format that does not use '.bzr' for its control dir. """ raise errors.BzrError("ControlDirFormat.register_format() has been " "removed in Bazaar 2.4. Please upgrade your plugins.") @classmethod def register_prober(klass, prober): """Register a prober that can look for a control dir. """ klass._probers.append(prober) @classmethod def unregister_prober(klass, prober): """Unregister a prober. """ klass._probers.remove(prober) @classmethod def register_server_prober(klass, prober): """Register a control format prober for client-server environments. These probers will be used before ones registered with register_prober. This gives implementations that decide to the chance to grab it before anything looks at the contents of the format file. """ klass._server_probers.append(prober) def __str__(self): # Trim the newline return self.get_format_description().rstrip() @classmethod def all_probers(klass): return klass._server_probers + klass._probers @classmethod def known_formats(klass): """Return all the known formats. """ result = set() for prober_kls in klass.all_probers(): result.update(prober_kls.known_formats()) return result @classmethod def find_format(klass, transport, probers=None): """Return the format present at transport.""" if probers is None: probers = klass.all_probers() for prober_kls in probers: prober = prober_kls() try: return prober.probe_transport(transport) except errors.NotBranchError: # this format does not find a control dir here. pass raise errors.NotBranchError(path=transport.base) def initialize(self, url, possible_transports=None): """Create a control dir at this url and return an opened copy. While not deprecated, this method is very specific and its use will lead to many round trips to setup a working environment. See initialize_on_transport_ex for a [nearly] all-in-one method. Subclasses should typically override initialize_on_transport instead of this method. """ return self.initialize_on_transport( _mod_transport.get_transport(url, possible_transports)) def initialize_on_transport(self, transport): """Initialize a new controldir in the base directory of a Transport.""" raise NotImplementedError(self.initialize_on_transport) def initialize_on_transport_ex(self, transport, use_existing_dir=False, create_prefix=False, force_new_repo=False, stacked_on=None, stack_on_pwd=None, repo_format_name=None, make_working_trees=None, shared_repo=False, vfs_only=False): """Create this format on transport. The directory to initialize will be created. :param force_new_repo: Do not use a shared repository for the target, even if one is available. :param create_prefix: Create any missing directories leading up to to_transport. :param use_existing_dir: Use an existing directory if one exists. :param stacked_on: A url to stack any created branch on, None to follow any target stacking policy. :param stack_on_pwd: If stack_on is relative, the location it is relative to. :param repo_format_name: If non-None, a repository will be made-or-found. Should none be found, or if force_new_repo is True the repo_format_name is used to select the format of repository to create. :param make_working_trees: Control the setting of make_working_trees for a new shared repository when one is made. None to use whatever default the format has. :param shared_repo: Control whether made repositories are shared or not. :param vfs_only: If True do not attempt to use a smart server :return: repo, controldir, require_stacking, repository_policy. repo is None if none was created or found, controldir is always valid. require_stacking is the result of examining the stacked_on parameter and any stacking policy found for the target. """ raise NotImplementedError(self.initialize_on_transport_ex) def network_name(self): """A simple byte string uniquely identifying this format for RPC calls. Bzr control formats use this disk format string to identify the format over the wire. Its possible that other control formats have more complex detection requirements, so we permit them to use any unique and immutable string they desire. """ raise NotImplementedError(self.network_name) def open(self, transport, _found=False): """Return an instance of this format for the dir transport points at. """ raise NotImplementedError(self.open) @classmethod def _set_default_format(klass, format): """Set default format (for testing behavior of defaults only)""" klass._default_format = format @classmethod def get_default_format(klass): """Return the current default format.""" return klass._default_format def supports_transport(self, transport): """Check if this format can be opened over a particular transport. """ raise NotImplementedError(self.supports_transport) class Prober(object): """Abstract class that can be used to detect a particular kind of control directory. At the moment this just contains a single method to probe a particular transport, but it may be extended in the future to e.g. avoid multiple levels of probing for Subversion repositories. See BzrProber and RemoteBzrProber in bzrlib.bzrdir for the probers that detect .bzr/ directories and Bazaar smart servers, respectively. Probers should be registered using the register_server_prober or register_prober methods on ControlDirFormat. """ def probe_transport(self, transport): """Return the controldir style format present in a directory. :raise UnknownFormatError: If a control dir was found but is in an unknown format. :raise NotBranchError: If no control directory was found. :return: A ControlDirFormat instance. """ raise NotImplementedError(self.probe_transport) @classmethod def known_formats(klass): """Return the control dir formats known by this prober. Multiple probers can return the same formats, so this should return a set. :return: A set of known formats. """ raise NotImplementedError(klass.known_formats) class ControlDirFormatInfo(object): def __init__(self, native, deprecated, hidden, experimental): self.deprecated = deprecated self.native = native self.hidden = hidden self.experimental = experimental class ControlDirFormatRegistry(registry.Registry): """Registry of user-selectable ControlDir subformats. Differs from ControlDirFormat._formats in that it provides sub-formats, e.g. BzrDirMeta1 with weave repository. Also, it's more user-oriented. """ def __init__(self): """Create a ControlDirFormatRegistry.""" self._aliases = set() self._registration_order = list() super(ControlDirFormatRegistry, self).__init__() def aliases(self): """Return a set of the format names which are aliases.""" return frozenset(self._aliases) def register(self, key, factory, help, native=True, deprecated=False, hidden=False, experimental=False, alias=False): """Register a ControlDirFormat factory. The factory must be a callable that takes one parameter: the key. It must produce an instance of the ControlDirFormat when called. This function mainly exists to prevent the info object from being supplied directly. """ registry.Registry.register(self, key, factory, help, ControlDirFormatInfo(native, deprecated, hidden, experimental)) if alias: self._aliases.add(key) self._registration_order.append(key) def register_lazy(self, key, module_name, member_name, help, native=True, deprecated=False, hidden=False, experimental=False, alias=False): registry.Registry.register_lazy(self, key, module_name, member_name, help, ControlDirFormatInfo(native, deprecated, hidden, experimental)) if alias: self._aliases.add(key) self._registration_order.append(key) def set_default(self, key): """Set the 'default' key to be a clone of the supplied key. This method must be called once and only once. """ registry.Registry.register(self, 'default', self.get(key), self.get_help(key), info=self.get_info(key)) self._aliases.add('default') def set_default_repository(self, key): """Set the FormatRegistry default and Repository default. This is a transitional method while Repository.set_default_format is deprecated. """ if 'default' in self: self.remove('default') self.set_default(key) format = self.get('default')() def make_bzrdir(self, key): return self.get(key)() def help_topic(self, topic): output = "" default_realkey = None default_help = self.get_help('default') help_pairs = [] for key in self._registration_order: if key == 'default': continue help = self.get_help(key) if help == default_help: default_realkey = key else: help_pairs.append((key, help)) def wrapped(key, help, info): if info.native: help = '(native) ' + help return ':%s:\n%s\n\n' % (key, textwrap.fill(help, initial_indent=' ', subsequent_indent=' ', break_long_words=False)) if default_realkey is not None: output += wrapped(default_realkey, '(default) %s' % default_help, self.get_info('default')) deprecated_pairs = [] experimental_pairs = [] for key, help in help_pairs: info = self.get_info(key) if info.hidden: continue elif info.deprecated: deprecated_pairs.append((key, help)) elif info.experimental: experimental_pairs.append((key, help)) else: output += wrapped(key, help, info) output += "\nSee :doc:`formats-help` for more about storage formats." other_output = "" if len(experimental_pairs) > 0: other_output += "Experimental formats are shown below.\n\n" for key, help in experimental_pairs: info = self.get_info(key) other_output += wrapped(key, help, info) else: other_output += \ "No experimental formats are available.\n\n" if len(deprecated_pairs) > 0: other_output += "\nDeprecated formats are shown below.\n\n" for key, help in deprecated_pairs: info = self.get_info(key) other_output += wrapped(key, help, info) else: other_output += \ "\nNo deprecated formats are available.\n\n" other_output += \ "\nSee :doc:`formats-help` for more about storage formats." if topic == 'other-formats': return other_output else: return output class RepoInitHookParams(object): """Object holding parameters passed to `*_repo_init` hooks. There are 4 fields that hooks may wish to access: :ivar repository: Repository created :ivar format: Repository format :ivar bzrdir: The controldir for the repository :ivar shared: The repository is shared """ def __init__(self, repository, format, controldir, shared): """Create a group of RepoInitHook parameters. :param repository: Repository created :param format: Repository format :param controldir: The controldir for the repository :param shared: The repository is shared """ self.repository = repository self.format = format self.bzrdir = controldir self.shared = shared def __eq__(self, other): return self.__dict__ == other.__dict__ def __repr__(self): if self.repository: return "<%s for %s>" % (self.__class__.__name__, self.repository) else: return "<%s for %s>" % (self.__class__.__name__, self.bzrdir) # Please register new formats after old formats so that formats # appear in chronological order and format descriptions can build # on previous ones. format_registry = ControlDirFormatRegistry() network_format_registry = registry.FormatRegistry() """Registry of formats indexed by their network name. The network name for a ControlDirFormat is an identifier that can be used when referring to formats with smart server operations. See ControlDirFormat.network_name() for more detail. """ bzr-2.7.0/bzrlib/counted_lock.py0000644000000000000000000000713211673635356014774 0ustar 00000000000000# Copyright (C) 2007, 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Counted lock class""" from __future__ import absolute_import from bzrlib import ( errors, ) class CountedLock(object): """Decorator around a lock that makes it reentrant. This can be used with any object that provides a basic Lock interface, including LockDirs and OS file locks. :ivar _token: While a write lock is held, this is the token for it. """ def __init__(self, real_lock): self._real_lock = real_lock self._lock_mode = None self._lock_count = 0 def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._real_lock) def break_lock(self): self._real_lock.break_lock() self._lock_mode = None self._lock_count = 0 def get_physical_lock_status(self): """Return physical lock status. Returns true if a lock is held on the transport. If no lock is held, or the underlying locking mechanism does not support querying lock status, false is returned. """ try: return self._real_lock.peek() is not None except NotImplementedError: return False def is_locked(self): return self._lock_mode is not None def lock_read(self): """Acquire the lock in read mode. If the lock is already held in either read or write mode this increments the count and succeeds. If the lock is not already held, it is taken in read mode. """ if self._lock_mode: self._lock_count += 1 else: self._real_lock.lock_read() self._lock_count = 1 self._lock_mode = 'r' def lock_write(self, token=None): """Acquire the lock in write mode. If the lock was originally acquired in read mode this will fail. :param token: If given and the lock is already held, then validate that we already hold the real lock with this token. :returns: The token from the underlying lock. """ if self._lock_count == 0: self._token = self._real_lock.lock_write(token=token) self._lock_mode = 'w' self._lock_count += 1 return self._token elif self._lock_mode != 'w': raise errors.ReadOnlyError(self) else: self._real_lock.validate_token(token) self._lock_count += 1 return self._token def unlock(self): if self._lock_count == 0: raise errors.LockNotHeld(self) elif self._lock_count == 1: # these are decremented first; if we fail to unlock the most # reasonable assumption is that we still don't have the lock # anymore self._lock_mode = None self._lock_count -= 1 self._real_lock.unlock() else: self._lock_count -= 1 bzr-2.7.0/bzrlib/crash.py0000644000000000000000000002254112102132732013376 0ustar 00000000000000# Copyright (C) 2009-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Handling and reporting crashes. A crash is an exception propagated up almost to the top level of Bazaar. If we have apport , we store a report of the crash using apport into its /var/crash spool directory, from where the user can either manually send it to Launchpad. In some cases (at least Ubuntu development releases), Apport may pop up a window asking if they want to send it. Without apport, we just write a crash report to stderr and the user can report this manually if the wish. We never send crash data across the network without user opt-in. In principle apport can run on any platform though as of Feb 2010 there seem to be some portability bugs. To force this off in bzr turn set APPORT_DISABLE in the environment or -Dno_apport. """ from __future__ import absolute_import # for interactive testing, try the 'bzr assert-fail' command # or see http://code.launchpad.net/~mbp/bzr/bzr-fail # # to test with apport it's useful to set # export APPORT_IGNORE_OBSOLETE_PACKAGES=1 import os import platform import pprint import sys import time from StringIO import StringIO import bzrlib from bzrlib import ( config, debug, osutils, plugin, trace, ) def report_bug(exc_info, stderr): if ('no_apport' in debug.debug_flags) or \ os.environ.get('APPORT_DISABLE', None): return report_bug_legacy(exc_info, stderr) try: if report_bug_to_apport(exc_info, stderr): # wrote a file; if None then report the old way return except ImportError, e: trace.mutter("couldn't find apport bug-reporting library: %s" % e) except Exception, e: # this should only happen if apport is installed but it didn't # work, eg because of an io error writing the crash file trace.mutter("bzr: failed to report crash using apport: %r" % e) trace.log_exception_quietly() return report_bug_legacy(exc_info, stderr) def report_bug_legacy(exc_info, err_file): """Report a bug by just printing a message to the user.""" trace.print_exception(exc_info, err_file) err_file.write('\n') import textwrap def print_wrapped(l): err_file.write(textwrap.fill(l, width=78, subsequent_indent=' ') + '\n') print_wrapped('bzr %s on python %s (%s)\n' % \ (bzrlib.__version__, bzrlib._format_version_tuple(sys.version_info), platform.platform(aliased=1))) print_wrapped('arguments: %r\n' % sys.argv) print_wrapped(textwrap.fill( 'plugins: ' + plugin.format_concise_plugin_list(), width=78, subsequent_indent=' ', ) + '\n') print_wrapped( 'encoding: %r, fsenc: %r, lang: %r\n' % ( osutils.get_user_encoding(), sys.getfilesystemencoding(), os.environ.get('LANG'))) # We used to show all the plugins here, but it's too verbose. err_file.write( "\n" "*** Bazaar has encountered an internal error. This probably indicates a\n" " bug in Bazaar. You can help us fix it by filing a bug report at\n" " https://bugs.launchpad.net/bzr/+filebug\n" " including this traceback and a description of the problem.\n" ) def report_bug_to_apport(exc_info, stderr): """Report a bug to apport for optional automatic filing. :returns: The name of the crash file, or None if we didn't write one. """ # this function is based on apport_package_hook.py, but omitting some of the # Ubuntu-specific policy about what to report and when # This import is apparently not used, but we're doing it so that if the # import fails, the exception will be caught at a higher level and we'll # report the error by other means. import apport crash_filename = _write_apport_report_to_file(exc_info) if crash_filename is None: stderr.write("\n" "apport is set to ignore crashes in this version of bzr.\n" ) else: trace.print_exception(exc_info, stderr) stderr.write("\n" "You can report this problem to Bazaar's developers by running\n" " apport-bug %s\n" "if a bug-reporting window does not automatically appear.\n" % (crash_filename)) # XXX: on Windows, Mac, and other platforms where we might have the # apport libraries but not have an apport always running, we could # synchronously file now return crash_filename def _write_apport_report_to_file(exc_info): import traceback from apport.report import Report exc_type, exc_object, exc_tb = exc_info pr = Report() # add_proc_info sets the ExecutablePath, InterpreterPath, etc. pr.add_proc_info() # It also adds ProcMaps which for us is rarely useful and mostly noise, so # let's remove it. del pr['ProcMaps'] pr.add_user_info() # Package and SourcePackage are needed so that apport will report about even # non-packaged versions of bzr; also this reports on their packaged # dependencies which is useful. pr['SourcePackage'] = 'bzr' pr['Package'] = 'bzr' pr['CommandLine'] = pprint.pformat(sys.argv) pr['BzrVersion'] = bzrlib.__version__ pr['PythonVersion'] = bzrlib._format_version_tuple(sys.version_info) pr['Platform'] = platform.platform(aliased=1) pr['UserEncoding'] = osutils.get_user_encoding() pr['FileSystemEncoding'] = sys.getfilesystemencoding() pr['Locale'] = os.environ.get('LANG', 'C') pr['BzrPlugins'] = _format_plugin_list() pr['PythonLoadedModules'] = _format_module_list() pr['BzrDebugFlags'] = pprint.pformat(debug.debug_flags) # actually we'd rather file directly against the upstream product, but # apport does seem to count on there being one in there; we might need to # redirect it elsewhere anyhow pr['SourcePackage'] = 'bzr' pr['Package'] = 'bzr' # tell apport to file directly against the bzr package using # # # XXX: unfortunately apport may crash later if the crashdb definition # file isn't present pr['CrashDb'] = 'bzr' tb_file = StringIO() traceback.print_exception(exc_type, exc_object, exc_tb, file=tb_file) pr['Traceback'] = tb_file.getvalue() _attach_log_tail(pr) # We want to use the 'bzr' crashdb so that it gets sent directly upstream, # which is a reasonable default for most internal errors. However, if we # set it here then apport will crash later if it doesn't know about that # crashdb. Instead, we rely on the bzr package installing both a # source hook telling crashes to go to this crashdb, and a crashdb # configuration describing it. # these may contain some sensitive info (smtp_passwords) # TODO: strip that out and attach the rest # #attach_file_if_exists(report, # os.path.join(dot_bzr, 'bazaar.conf', 'BzrConfig') #attach_file_if_exists(report, # os.path.join(dot_bzr, 'locations.conf', 'BzrLocations') # strip username, hostname, etc pr.anonymize() if pr.check_ignored(): # eg configured off in ~/.apport-ignore.xml return None else: crash_file_name, crash_file = _open_crash_file() pr.write(crash_file) crash_file.close() return crash_file_name def _attach_log_tail(pr): try: bzr_log = open(trace._get_bzr_log_filename(), 'rt') except (IOError, OSError), e: pr['BzrLogTail'] = repr(e) return try: lines = bzr_log.readlines() pr['BzrLogTail'] = ''.join(lines[-40:]) finally: bzr_log.close() def _open_crash_file(): crash_dir = config.crash_dir() if not osutils.isdir(crash_dir): # on unix this should be /var/crash and should already exist; on # Windows or if it's manually configured it might need to be created, # and then it should be private os.makedirs(crash_dir, mode=0600) date_string = time.strftime('%Y-%m-%dT%H:%M', time.gmtime()) # XXX: getuid doesn't work on win32, but the crash directory is per-user if sys.platform == 'win32': user_part = '' else: user_part = '.%d' % os.getuid() filename = osutils.pathjoin( crash_dir, 'bzr%s.%s.crash' % ( user_part, date_string)) # be careful here that people can't play tmp-type symlink mischief in the # world-writable directory return filename, os.fdopen( os.open(filename, os.O_WRONLY|os.O_CREAT|os.O_EXCL, 0600), 'wb') def _format_plugin_list(): return ''.join(plugin.describe_plugins(show_paths=True)) def _format_module_list(): return pprint.pformat(sys.modules) bzr-2.7.0/bzrlib/debug.py0000644000000000000000000000346311673635356013414 0ustar 00000000000000# Copyright (C) 2005, 2006, 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Set of flags that enable different debug behaviour. These are set with eg ``-Dlock`` on the bzr command line or in ~/.bazaar/bazaar.conf debug_flags. See `bzr help debug-flags` or `bzrlib/help_topics/en/debug-flags.txt` for a list of the available options. """ from __future__ import absolute_import debug_flags = set() def set_debug_flags_from_config(): """Turn on debug flags based on the global configuration""" from bzrlib import config c = config.GlobalStack() for f in c.get('debug_flags'): debug_flags.add(f) def set_trace(): """Pdb using original stdin and stdout. When debugging blackbox tests, sys.stdin and sys.stdout are captured for test purposes and cannot be used for interactive debugging. This class uses the origianl stdin/stdout to allow such use. Instead of doing: import pdb; pdb.set_trace() you can do: from bzrlib import debug; debug.set_trace() """ import pdb import sys pdb.Pdb(stdin=sys.__stdin__, stdout=sys.__stdout__ ).set_trace(sys._getframe().f_back) bzr-2.7.0/bzrlib/decorators.py0000644000000000000000000002551611673360271014465 0ustar 00000000000000# Copyright (C) 2006-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import __all__ = ['needs_read_lock', 'needs_write_lock', 'use_fast_decorators', 'use_pretty_decorators', ] import sys from bzrlib import trace def _get_parameters(func): """Recreate the parameters for a function using introspection. :return: (function_params, calling_params, default_values) function_params: is a string representing the parameters of the function. (such as "a, b, c=None, d=1") This is used in the function declaration. calling_params: is another string representing how you would call the function with the correct parameters. (such as "a, b, c=c, d=d") Assuming you used function_params in the function declaration, this is the parameters to put in the function call. default_values_block: a dict with the default values to be passed as the scope for the 'exec' statement. For example: def wrapper(%(function_params)s): return original(%(calling_params)s) """ # "import inspect" should stay in local scope. 'inspect' takes a long time # to import the first time. And since we don't always need it, don't import # it globally. import inspect args, varargs, varkw, defaults = inspect.getargspec(func) defaults_dict = {} def formatvalue(value): default_name = '__default_%d' % len(defaults_dict) defaults_dict[default_name] = value return '=' + default_name formatted = inspect.formatargspec(args, varargs=varargs, varkw=varkw, defaults=defaults, formatvalue=formatvalue) if defaults is None: args_passed = args else: first_default = len(args) - len(defaults) args_passed = args[:first_default] for arg in args[first_default:]: args_passed.append("%s=%s" % (arg, arg)) if varargs is not None: args_passed.append('*' + varargs) if varkw is not None: args_passed.append('**' + varkw) args_passed = ', '.join(args_passed) return formatted[1:-1], args_passed, defaults_dict def _pretty_needs_read_lock(unbound): """Decorate unbound to take out and release a read lock. This decorator can be applied to methods of any class with lock_read() and unlock() methods. Typical usage: class Branch(...): @needs_read_lock def branch_method(self, ...): stuff """ # This compiles a function with a similar name, but wrapped with # lock_read/unlock calls. We use dynamic creation, because we need the # internal name of the function to be modified so that --lsprof will see # the correct name. # TODO: jam 20070111 Modify this template so that the generated function # has the same argument signature as the original function, which # will help commands like epydoc. # This seems possible by introspecting foo.func_defaults, and # foo.func_code.co_argcount and foo.func_code.co_varnames template = """\ def %(name)s_read_locked(%(params)s): self.lock_read() try: result = unbound(%(passed_params)s) except: import sys exc_info = sys.exc_info() try: self.unlock() finally: try: raise exc_info[0], exc_info[1], exc_info[2] finally: del exc_info else: self.unlock() return result read_locked = %(name)s_read_locked """ params, passed_params, defaults_dict = _get_parameters(unbound) variables = {'name':unbound.__name__, 'params':params, 'passed_params':passed_params, } func_def = template % variables scope = dict(defaults_dict) scope['unbound'] = unbound exec func_def in scope read_locked = scope['read_locked'] read_locked.__doc__ = unbound.__doc__ read_locked.__name__ = unbound.__name__ return read_locked def _fast_needs_read_lock(unbound): """Decorate unbound to take out and release a read lock. This decorator can be applied to methods of any class with lock_read() and unlock() methods. Typical usage: class Branch(...): @needs_read_lock def branch_method(self, ...): stuff """ def read_locked(self, *args, **kwargs): self.lock_read() try: result = unbound(self, *args, **kwargs) except: import sys exc_info = sys.exc_info() try: self.unlock() finally: try: raise exc_info[0], exc_info[1], exc_info[2] finally: del exc_info else: self.unlock() return result read_locked.__doc__ = unbound.__doc__ read_locked.__name__ = unbound.__name__ return read_locked def _pretty_needs_write_lock(unbound): """Decorate unbound to take out and release a write lock.""" template = """\ def %(name)s_write_locked(%(params)s): self.lock_write() try: result = unbound(%(passed_params)s) except: import sys exc_info = sys.exc_info() try: self.unlock() finally: try: raise exc_info[0], exc_info[1], exc_info[2] finally: del exc_info else: self.unlock() return result write_locked = %(name)s_write_locked """ params, passed_params, defaults_dict = _get_parameters(unbound) variables = {'name':unbound.__name__, 'params':params, 'passed_params':passed_params, } func_def = template % variables scope = dict(defaults_dict) scope['unbound'] = unbound exec func_def in scope write_locked = scope['write_locked'] write_locked.__doc__ = unbound.__doc__ write_locked.__name__ = unbound.__name__ return write_locked def _fast_needs_write_lock(unbound): """Decorate unbound to take out and release a write lock.""" def write_locked(self, *args, **kwargs): self.lock_write() try: result = unbound(self, *args, **kwargs) except: exc_info = sys.exc_info() try: self.unlock() finally: try: raise exc_info[0], exc_info[1], exc_info[2] finally: del exc_info else: self.unlock() return result write_locked.__doc__ = unbound.__doc__ write_locked.__name__ = unbound.__name__ return write_locked def only_raises(*errors): """Make a decorator that will only allow the given error classes to be raised. All other errors will be logged and then discarded. Typical use is something like:: @only_raises(LockNotHeld, LockBroken) def unlock(self): # etc """ def decorator(unbound): def wrapped(*args, **kwargs): try: return unbound(*args, **kwargs) except errors: raise except: trace.mutter('Error suppressed by only_raises:') trace.log_exception_quietly() wrapped.__doc__ = unbound.__doc__ wrapped.__name__ = unbound.__name__ return wrapped return decorator # Default is more functionality, 'bzr' the commandline will request fast # versions. needs_read_lock = _pretty_needs_read_lock needs_write_lock = _pretty_needs_write_lock def use_fast_decorators(): """Change the default decorators to be fast loading ones. The alternative is to have decorators that do more work to produce nice-looking decorated functions, but this slows startup time. """ global needs_read_lock, needs_write_lock needs_read_lock = _fast_needs_read_lock needs_write_lock = _fast_needs_write_lock def use_pretty_decorators(): """Change the default decorators to be pretty ones.""" global needs_read_lock, needs_write_lock needs_read_lock = _pretty_needs_read_lock needs_write_lock = _pretty_needs_write_lock # This implementation of cachedproperty is copied from Launchpad's # canonical.launchpad.cachedproperty module (with permission from flacoste) # -- spiv & vila 100120 def cachedproperty(attrname_or_fn): """A decorator for methods that makes them properties with their return value cached. The value is cached on the instance, using the attribute name provided. If you don't provide a name, the mangled name of the property is used. >>> class CachedPropertyTest(object): ... ... @cachedproperty('_foo_cache') ... def foo(self): ... print 'foo computed' ... return 23 ... ... @cachedproperty ... def bar(self): ... print 'bar computed' ... return 69 >>> cpt = CachedPropertyTest() >>> getattr(cpt, '_foo_cache', None) is None True >>> cpt.foo foo computed 23 >>> cpt.foo 23 >>> cpt._foo_cache 23 >>> cpt.bar bar computed 69 >>> cpt._bar_cached_value 69 """ if isinstance(attrname_or_fn, basestring): attrname = attrname_or_fn return _CachedPropertyForAttr(attrname) else: fn = attrname_or_fn attrname = '_%s_cached_value' % fn.__name__ return _CachedProperty(attrname, fn) class _CachedPropertyForAttr(object): def __init__(self, attrname): self.attrname = attrname def __call__(self, fn): return _CachedProperty(self.attrname, fn) class _CachedProperty(object): def __init__(self, attrname, fn): self.fn = fn self.attrname = attrname self.marker = object() def __get__(self, inst, cls=None): if inst is None: return self cachedresult = getattr(inst, self.attrname, self.marker) if cachedresult is self.marker: result = self.fn(inst) setattr(inst, self.attrname, result) return result else: return cachedresult bzr-2.7.0/bzrlib/delta.h0000644000000000000000000001304111564232056013174 0ustar 00000000000000/* * delta.h: headers for delta functionality * * Adapted from GIT for Bazaar by * John Arbash Meinel (C) 2009 * * This code is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #ifndef DELTA_H #define DELTA_H /* opaque object for delta index */ struct delta_index; struct source_info { const void *buf; /* Pointer to the beginning of source data */ unsigned long size; /* Total length of source data */ unsigned long agg_offset; /* Start of source data as part of the aggregate source */ }; /* result type for functions that have multiple failure modes */ typedef enum { DELTA_OK, /* Success */ DELTA_OUT_OF_MEMORY, /* Could not allocate required memory */ DELTA_INDEX_NEEDED, /* A delta_index must be passed */ DELTA_SOURCE_EMPTY, /* A source_info had no content */ DELTA_SOURCE_BAD, /* A source_info had invalid or corrupt content */ DELTA_BUFFER_EMPTY, /* A buffer pointer and size */ DELTA_SIZE_TOO_BIG, /* Delta data is larger than the max requested */ } delta_result; /* * create_delta_index: compute index data from given buffer * * Returns a delta_result status, when DELTA_OK then *fresh is set to a struct * delta_index that should be passed to subsequent create_delta() calls, or to * free_delta_index(). Other values are a failure, and *fresh is unset. * The given buffer must not be freed nor altered before free_delta_index() is * called. The resultant struct must be freed using free_delta_index(). * * :param max_bytes_to_index: Limit the number of regions to sample to this * amount of text. We will store at most max_bytes_to_index / RABIN_WINDOW * pointers into the source text. Useful if src can be unbounded in size, * and you are willing to trade match accuracy for peak memory. */ extern delta_result create_delta_index(const struct source_info *src, struct delta_index *old, struct delta_index **fresh, int max_bytes_to_index); /* * create_delta_index_from_delta: compute index data from given buffer * * Returns a delta_result status, when DELTA_OK then *fresh is set to a struct * delta_index that should be passed to subsequent create_delta() calls, or to * free_delta_index(). Other values are a failure, and *fresh is unset. * The bytes must be in the form of a delta structure, as generated by * create_delta(). The generated index will only index the insert bytes, and * not any of the control structures. */ extern delta_result create_delta_index_from_delta(const struct source_info *delta, struct delta_index *old, struct delta_index **fresh); /* * free_delta_index: free the index created by create_delta_index() * * Given pointer must be what create_delta_index() returned, or NULL. */ extern void free_delta_index(struct delta_index *index); /* * sizeof_delta_index: returns memory usage of delta index * * Given pointer must be what create_delta_index() returned, or NULL. */ extern unsigned long sizeof_delta_index(struct delta_index *index); /* * create_delta: create a delta from given index for the given buffer * * This function may be called multiple times with different buffers using * the same delta_index pointer. If max_delta_size is non-zero and the * resulting delta is to be larger than max_delta_size then DELTA_SIZE_TOO_BIG * is returned. Otherwise on success, DELTA_OK is returned and *delta_data is * set to a new buffer with the delta data and *delta_size is updated with its * size. That buffer must be freed by the caller. */ extern delta_result create_delta(const struct delta_index *index, const void *buf, unsigned long bufsize, unsigned long *delta_size, unsigned long max_delta_size, void **delta_data); /* the smallest possible delta size is 3 bytes * Target size, Copy command, Copy length */ #define DELTA_SIZE_MIN 3 /* * This must be called twice on the delta data buffer, first to get the * expected source buffer size, and again to get the target buffer size. */ static unsigned long get_delta_hdr_size(unsigned char **datap, const unsigned char *top) { unsigned char *data = *datap; unsigned char cmd; unsigned long size = 0; int i = 0; do { cmd = *data++; size |= (cmd & ~0x80) << i; i += 7; } while (cmd & 0x80 && data < top); *datap = data; return size; } /* * Return the basic information about a given delta index. * :param index: The delta_index object * :param pos: The offset in the entry list. Start at 0, and walk until you get * 0 as a return code. * :param global_offset: return value, distance to the beginning of all sources * :param hash_val: return value, the RABIN hash associated with this pointer * :param hash_offset: Location for this entry in the hash array. * :return: 1 if pos != -1 (there was data produced) */ extern int get_entry_summary(const struct delta_index *index, int pos, unsigned int *text_offset, unsigned int *hash_val); /* * Determine what entry index->hash[X] points to. */ extern int get_hash_offset(const struct delta_index *index, int pos, unsigned int *entry_offset); /* * Compute the rabin_hash of the given data, it is assumed the data is at least * RABIN_WINDOW wide (16 bytes). */ extern unsigned int rabin_hash(const unsigned char *data); #endif bzr-2.7.0/bzrlib/delta.py0000644000000000000000000004266111673360271013411 0ustar 00000000000000# Copyright (C) 2005-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import from bzrlib import ( osutils, ) from bzrlib.trace import is_quiet class TreeDelta(object): """Describes changes from one tree to another. Contains seven lists: added (path, id, kind) removed (path, id, kind) renamed (oldpath, newpath, id, kind, text_modified, meta_modified) kind_changed (path, id, old_kind, new_kind) modified (path, id, kind, text_modified, meta_modified) unchanged (path, id, kind) unversioned (path, None, kind) Each id is listed only once. Files that are both modified and renamed are listed only in renamed, with the text_modified flag true. The text_modified applies either to the content of the file or the target of the symbolic link, depending of the kind of file. Files are only considered renamed if their name has changed or their parent directory has changed. Renaming a directory does not count as renaming all its contents. The lists are normally sorted when the delta is created. """ def __init__(self): self.added = [] self.removed = [] self.renamed = [] self.kind_changed = [] self.modified = [] self.unchanged = [] self.unversioned = [] self.missing = [] def __eq__(self, other): if not isinstance(other, TreeDelta): return False return self.added == other.added \ and self.removed == other.removed \ and self.renamed == other.renamed \ and self.modified == other.modified \ and self.unchanged == other.unchanged \ and self.kind_changed == other.kind_changed \ and self.unversioned == other.unversioned def __ne__(self, other): return not (self == other) def __repr__(self): return "TreeDelta(added=%r, removed=%r, renamed=%r," \ " kind_changed=%r, modified=%r, unchanged=%r," \ " unversioned=%r)" % (self.added, self.removed, self.renamed, self.kind_changed, self.modified, self.unchanged, self.unversioned) def has_changed(self): return bool(self.modified or self.added or self.removed or self.renamed or self.kind_changed) def touches_file_id(self, file_id): """Return True if file_id is modified by this delta.""" for l in self.added, self.removed, self.modified: for v in l: if v[1] == file_id: return True for v in self.renamed: if v[2] == file_id: return True for v in self.kind_changed: if v[1] == file_id: return True return False def get_changes_as_text(self, show_ids=False, show_unchanged=False, short_status=False): import StringIO output = StringIO.StringIO() report_delta(output, self, short_status, show_ids, show_unchanged) return output.getvalue() def _compare_trees(old_tree, new_tree, want_unchanged, specific_files, include_root, extra_trees=None, require_versioned=False, want_unversioned=False): """Worker function that implements Tree.changes_from.""" delta = TreeDelta() # mutter('start compare_trees') for (file_id, path, content_change, versioned, parent_id, name, kind, executable) in new_tree.iter_changes(old_tree, want_unchanged, specific_files, extra_trees=extra_trees, require_versioned=require_versioned, want_unversioned=want_unversioned): if versioned == (False, False): delta.unversioned.append((path[1], None, kind[1])) continue if not include_root and (None, None) == parent_id: continue fully_present = tuple((versioned[x] and kind[x] is not None) for x in range(2)) if fully_present[0] != fully_present[1]: if fully_present[1] is True: delta.added.append((path[1], file_id, kind[1])) else: delta.removed.append((path[0], file_id, kind[0])) elif fully_present[0] is False: delta.missing.append((path[1], file_id, kind[1])) elif name[0] != name[1] or parent_id[0] != parent_id[1]: # If the name changes, or the parent_id changes, we have a rename # (if we move a parent, that doesn't count as a rename for the # file) delta.renamed.append((path[0], path[1], file_id, kind[1], content_change, (executable[0] != executable[1]))) elif kind[0] != kind[1]: delta.kind_changed.append((path[1], file_id, kind[0], kind[1])) elif content_change or executable[0] != executable[1]: delta.modified.append((path[1], file_id, kind[1], content_change, (executable[0] != executable[1]))) else: delta.unchanged.append((path[1], file_id, kind[1])) delta.removed.sort() delta.added.sort() delta.renamed.sort() delta.missing.sort() # TODO: jam 20060529 These lists shouldn't need to be sorted # since we added them in alphabetical order. delta.modified.sort() delta.unchanged.sort() return delta class _ChangeReporter(object): """Report changes between two trees""" def __init__(self, output=None, suppress_root_add=True, output_file=None, unversioned_filter=None, view_info=None, classify=True): """Constructor :param output: a function with the signature of trace.note, i.e. accepts a format and parameters. :param supress_root_add: If true, adding the root will be ignored (i.e. when a tree has just been initted) :param output_file: If supplied, a file-like object to write to. Only one of output and output_file may be supplied. :param unversioned_filter: A filter function to be called on unversioned files. This should return True to ignore a path. By default, no filtering takes place. :param view_info: A tuple of view_name,view_files if only items inside a view are to be reported on, or None for no view filtering. :param classify: Add special symbols to indicate file kind. """ if output_file is not None: if output is not None: raise BzrError('Cannot specify both output and output_file') def output(fmt, *args): output_file.write((fmt % args) + '\n') self.output = output if self.output is None: from bzrlib import trace self.output = trace.note self.suppress_root_add = suppress_root_add self.modified_map = {'kind changed': 'K', 'unchanged': ' ', 'created': 'N', 'modified': 'M', 'deleted': 'D', 'missing': '!', } self.versioned_map = {'added': '+', # versioned target 'unchanged': ' ', # versioned in both 'removed': '-', # versioned in source 'unversioned': '?', # versioned in neither } self.unversioned_filter = unversioned_filter if classify: self.kind_marker = osutils.kind_marker else: self.kind_marker = lambda kind: '' if view_info is None: self.view_name = None self.view_files = [] else: self.view_name = view_info[0] self.view_files = view_info[1] self.output("Operating on whole tree but only reporting on " "'%s' view." % (self.view_name,)) def report(self, file_id, paths, versioned, renamed, modified, exe_change, kind): """Report one change to a file :param file_id: The file_id of the file :param path: The old and new paths as generated by Tree.iter_changes. :param versioned: may be 'added', 'removed', 'unchanged', or 'unversioned. :param renamed: may be True or False :param modified: may be 'created', 'deleted', 'kind changed', 'modified' or 'unchanged'. :param exe_change: True if the execute bit has changed :param kind: A pair of file kinds, as generated by Tree.iter_changes. None indicates no file present. """ if is_quiet(): return if paths[1] == '' and versioned == 'added' and self.suppress_root_add: return if self.view_files and not osutils.is_inside_any(self.view_files, paths[1]): return if versioned == 'unversioned': # skip ignored unversioned files if needed. if self.unversioned_filter is not None: if self.unversioned_filter(paths[1]): return # dont show a content change in the output. modified = 'unchanged' # we show both paths in the following situations: # the file versioning is unchanged AND # ( the path is different OR # the kind is different) if (versioned == 'unchanged' and (renamed or modified == 'kind changed')): if renamed: # on a rename, we show old and new old_path, path = paths else: # if it's not renamed, we're showing both for kind changes # so only show the new path old_path, path = paths[1], paths[1] # if the file is not missing in the source, we show its kind # when we show two paths. if kind[0] is not None: old_path += self.kind_marker(kind[0]) old_path += " => " elif versioned == 'removed': # not present in target old_path = "" path = paths[0] else: old_path = "" path = paths[1] if renamed: rename = "R" else: rename = self.versioned_map[versioned] # we show the old kind on the new path when the content is deleted. if modified == 'deleted': path += self.kind_marker(kind[0]) # otherwise we always show the current kind when there is one elif kind[1] is not None: path += self.kind_marker(kind[1]) if exe_change: exe = '*' else: exe = ' ' self.output("%s%s%s %s%s", rename, self.modified_map[modified], exe, old_path, path) def report_changes(change_iterator, reporter): """Report the changes from a change iterator. This is essentially a translation from low-level to medium-level changes. Further processing may be required to produce a human-readable output. Unfortunately, some tree-changing operations are very complex :change_iterator: an iterator or sequence of changes in the format generated by Tree.iter_changes :param reporter: The _ChangeReporter that will report the changes. """ versioned_change_map = { (True, True) : 'unchanged', (True, False) : 'removed', (False, True) : 'added', (False, False): 'unversioned', } for (file_id, path, content_change, versioned, parent_id, name, kind, executable) in change_iterator: exe_change = False # files are "renamed" if they are moved or if name changes, as long # as it had a value if None not in name and None not in parent_id and\ (name[0] != name[1] or parent_id[0] != parent_id[1]): renamed = True else: renamed = False if kind[0] != kind[1]: if kind[0] is None: modified = "created" elif kind[1] is None: modified = "deleted" else: modified = "kind changed" else: if content_change: modified = "modified" elif kind[0] is None: modified = "missing" else: modified = "unchanged" if kind[1] == "file": exe_change = (executable[0] != executable[1]) versioned_change = versioned_change_map[versioned] reporter.report(file_id, path, versioned_change, renamed, modified, exe_change, kind) def report_delta(to_file, delta, short_status=False, show_ids=False, show_unchanged=False, indent='', filter=None, classify=True): """Output this delta in status-like form to to_file. :param to_file: A file-like object where the output is displayed. :param delta: A TreeDelta containing the changes to be displayed :param short_status: Single-line status if True. :param show_ids: Output the file ids if True. :param show_unchanged: Output the unchanged files if True. :param indent: Added at the beginning of all output lines (for merged revisions). :param filter: A callable receiving a path and a file id and returning True if the path should be displayed. :param classify: Add special symbols to indicate file kind. """ def decorate_path(path, kind, meta_modified=None): if not classify: return path if kind == 'directory': path += '/' elif kind == 'symlink': path += '@' if meta_modified: path += '*' return path def show_more_renamed(item): (oldpath, file_id, kind, text_modified, meta_modified, newpath) = item dec_new_path = decorate_path(newpath, kind, meta_modified) to_file.write(' => %s' % dec_new_path) if text_modified or meta_modified: extra_modified.append((newpath, file_id, kind, text_modified, meta_modified)) def show_more_kind_changed(item): (path, file_id, old_kind, new_kind) = item to_file.write(' (%s => %s)' % (old_kind, new_kind)) def show_path(path, file_id, kind, meta_modified, default_format, with_file_id_format): dec_path = decorate_path(path, kind, meta_modified) if show_ids: to_file.write(with_file_id_format % dec_path) else: to_file.write(default_format % dec_path) def show_list(files, long_status_name, short_status_letter, default_format='%s', with_file_id_format='%-30s', show_more=None): if files: header_shown = False if short_status: prefix = short_status_letter else: prefix = '' prefix = indent + prefix + ' ' for item in files: path, file_id, kind = item[:3] if (filter is not None and not filter(path, file_id)): continue if not header_shown and not short_status: to_file.write(indent + long_status_name + ':\n') header_shown = True meta_modified = None if len(item) == 5: meta_modified = item[4] to_file.write(prefix) show_path(path, file_id, kind, meta_modified, default_format, with_file_id_format) if show_more is not None: show_more(item) if show_ids: to_file.write(' %s' % file_id) to_file.write('\n') show_list(delta.removed, 'removed', 'D') show_list(delta.added, 'added', 'A') show_list(delta.missing, 'missing', '!') extra_modified = [] # Reorder delta.renamed tuples so that all lists share the same # order for their 3 first fields and that they also begin like # the delta.modified tuples renamed = [(p, i, k, tm, mm, np) for p, np, i, k, tm, mm in delta.renamed] show_list(renamed, 'renamed', 'R', with_file_id_format='%s', show_more=show_more_renamed) show_list(delta.kind_changed, 'kind changed', 'K', with_file_id_format='%s', show_more=show_more_kind_changed) show_list(delta.modified + extra_modified, 'modified', 'M') if show_unchanged: show_list(delta.unchanged, 'unchanged', 'S') show_list(delta.unversioned, 'unknown', ' ') bzr-2.7.0/bzrlib/diff-delta.c0000644000000000000000000012725711641367135014117 0ustar 00000000000000/* * diff-delta.c: generate a delta between two buffers * * This code was greatly inspired by parts of LibXDiff from Davide Libenzi * http://www.xmailserver.org/xdiff-lib.html * * Rewritten for GIT by Nicolas Pitre , (C) 2005-2007 * Adapted for Bazaar by John Arbash Meinel (C) 2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * NB: The version in GIT is 'version 2 of the Licence only', however Nicolas * has granted permission for use under 'version 2 or later' in private email * to Robert Collins and Karl Fogel on the 6th April 2009. */ #include #include "delta.h" #include #include #include /* maximum hash entry list for the same hash bucket */ #define HASH_LIMIT 64 #define RABIN_SHIFT 23 #define RABIN_WINDOW 16 /* The hash map is sized to put 4 entries per bucket, this gives us ~even room * for more data. Tweaking this number above 4 doesn't seem to help much, * anyway. */ #define EXTRA_NULLS 4 static const unsigned int T[256] = { 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344, 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259, 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85, 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2, 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a, 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db, 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753, 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964, 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8, 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5, 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81, 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6, 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e, 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77, 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff, 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8, 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc, 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1, 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d, 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a, 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2, 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02, 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a, 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd, 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61, 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c, 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08, 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f, 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7, 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe, 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76, 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141, 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65, 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78, 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4, 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93, 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b, 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa, 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872, 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645, 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99, 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84, 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811 }; static const unsigned int U[256] = { 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a, 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48, 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511, 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d, 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8, 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe, 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb, 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937, 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e, 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c, 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d, 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1, 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4, 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa, 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef, 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263, 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302, 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000, 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59, 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5, 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90, 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7, 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2, 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e, 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467, 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765, 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604, 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88, 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd, 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3, 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996, 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a, 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b, 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609, 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50, 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc, 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99, 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf, 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa, 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176, 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f, 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d, 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a }; struct index_entry { const unsigned char *ptr; const struct source_info *src; unsigned int val; }; struct index_entry_linked_list { struct index_entry *p_entry; struct index_entry_linked_list *next; }; struct unpacked_index_entry { struct index_entry entry; struct unpacked_index_entry *next; }; struct delta_index { unsigned long memsize; /* Total bytes pointed to by this index */ const struct source_info *last_src; /* Information about the referenced source */ unsigned int hash_mask; /* val & hash_mask gives the hash index for a given entry */ unsigned int num_entries; /* The total number of entries in this index */ struct index_entry *last_entry; /* Pointer to the last valid entry */ struct index_entry *hash[]; }; static unsigned int limit_hash_buckets(struct unpacked_index_entry **hash, unsigned int *hash_count, unsigned int hsize, unsigned int entries) { struct unpacked_index_entry *entry; unsigned int i; /* * Determine a limit on the number of entries in the same hash * bucket. This guards us against pathological data sets causing * really bad hash distribution with most entries in the same hash * bucket that would bring us to O(m*n) computing costs (m and n * corresponding to reference and target buffer sizes). * * Make sure none of the hash buckets has more entries than * we're willing to test. Otherwise we cull the entry list * uniformly to still preserve a good repartition across * the reference buffer. */ for (i = 0; i < hsize; i++) { int acc; if (hash_count[i] <= HASH_LIMIT) continue; /* We leave exactly HASH_LIMIT entries in the bucket */ entries -= hash_count[i] - HASH_LIMIT; entry = hash[i]; acc = 0; /* * Assume that this loop is gone through exactly * HASH_LIMIT times and is entered and left with * acc==0. So the first statement in the loop * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT * to the accumulator, and the inner loop consequently * is run (hash_count[i]-HASH_LIMIT) times, removing * one element from the list each time. Since acc * balances out to 0 at the final run, the inner loop * body can't be left with entry==NULL. So we indeed * encounter entry==NULL in the outer loop only. */ do { acc += hash_count[i] - HASH_LIMIT; if (acc > 0) { struct unpacked_index_entry *keep = entry; do { entry = entry->next; acc -= HASH_LIMIT; } while (acc > 0); keep->next = entry->next; } entry = entry->next; } while (entry); } return entries; } static struct delta_index * pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize, unsigned int num_entries, struct delta_index *old_index) { unsigned int i, j, hmask, memsize, fit_in_old, copied_count; struct unpacked_index_entry *entry; struct delta_index *index; struct index_entry *packed_entry, **packed_hash, *old_entry, *copy_from; struct index_entry null_entry = {0}; void *mem; hmask = hsize - 1; // if (old_index) { // fprintf(stderr, "Packing %d entries into %d for total of %d entries" // " %x => %x\n", // num_entries - old_index->num_entries, // old_index->num_entries, num_entries, // old_index->hash_mask, hmask); // } else { // fprintf(stderr, "Packing %d entries into a new index\n", // num_entries); // } /* First, see if we can squeeze the new items into the existing structure. */ fit_in_old = 0; copied_count = 0; if (old_index && old_index->hash_mask == hmask) { fit_in_old = 1; for (i = 0; i < hsize; ++i) { packed_entry = NULL; for (entry = hash[i]; entry; entry = entry->next) { if (packed_entry == NULL) { /* Find the last open spot */ packed_entry = old_index->hash[i + 1]; --packed_entry; while (packed_entry >= old_index->hash[i] && packed_entry->ptr == NULL) { --packed_entry; } ++packed_entry; } if (packed_entry >= old_index->hash[i+1] || packed_entry->ptr != NULL) { /* There are no free spots here :( */ fit_in_old = 0; break; } /* We found an empty spot to put this entry * Copy it over, and remove it from the linked list, just in * case we end up running out of room later. */ *packed_entry++ = entry->entry; assert(entry == hash[i]); hash[i] = entry->next; copied_count += 1; old_index->num_entries++; } if (!fit_in_old) { break; } } } if (old_index) { if (fit_in_old) { // fprintf(stderr, "Fit all %d entries into old index\n", // copied_count); /* * No need to allocate a new buffer, but return old_index ptr so * callers can distinguish this from an OOM failure. */ return old_index; } else { // fprintf(stderr, "Fit only %d entries into old index," // " reallocating\n", copied_count); } } /* * Now create the packed index in array form * rather than linked lists. * Leave a 2-entry gap for inserting more entries between the groups */ memsize = sizeof(*index) + sizeof(*packed_hash) * (hsize+1) + sizeof(*packed_entry) * (num_entries + hsize * EXTRA_NULLS); mem = malloc(memsize); if (!mem) { return NULL; } index = mem; index->memsize = memsize; index->hash_mask = hmask; index->num_entries = num_entries; if (old_index) { if (hmask < old_index->hash_mask) { fprintf(stderr, "hash mask was shrunk %x => %x\n", old_index->hash_mask, hmask); } assert(hmask >= old_index->hash_mask); } mem = index->hash; packed_hash = mem; mem = packed_hash + (hsize+1); packed_entry = mem; for (i = 0; i < hsize; i++) { /* * Coalesce all entries belonging to one linked list * into consecutive array entries. */ packed_hash[i] = packed_entry; /* Old comes earlier as a source, so it always comes first in a given * hash bucket. */ if (old_index) { /* Could we optimize this to use memcpy when hmask == * old_index->hash_mask? Would it make any real difference? */ j = i & old_index->hash_mask; copy_from = old_index->hash[j]; for (old_entry = old_index->hash[j]; old_entry < old_index->hash[j + 1] && old_entry->ptr != NULL; old_entry++) { if ((old_entry->val & hmask) == i) { *packed_entry++ = *old_entry; } } } for (entry = hash[i]; entry; entry = entry->next) { *packed_entry++ = entry->entry; } /* TODO: At this point packed_entry - packed_hash[i] is the number of * records that we have inserted into this hash bucket. * We should *really* consider doing some limiting along the * lines of limit_hash_buckets() to avoid pathological behavior. */ /* Now add extra 'NULL' entries that we can use for future expansion. */ for (j = 0; j < EXTRA_NULLS; ++j ) { *packed_entry++ = null_entry; } } /* Sentinel value to indicate the length of the last hash bucket */ packed_hash[hsize] = packed_entry; if (packed_entry - (struct index_entry *)mem != num_entries + hsize*EXTRA_NULLS) { fprintf(stderr, "We expected %d entries, but created %d\n", num_entries + hsize*EXTRA_NULLS, (int)(packed_entry - (struct index_entry*)mem)); } assert(packed_entry - (struct index_entry *)mem == num_entries + hsize*EXTRA_NULLS); index->last_entry = (packed_entry - 1); return index; } delta_result create_delta_index(const struct source_info *src, struct delta_index *old, struct delta_index **fresh, int max_bytes_to_index) { unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count; unsigned int total_num_entries, stride, max_entries; const unsigned char *data, *buffer; struct delta_index *index; struct unpacked_index_entry *entry, **hash; void *mem; unsigned long memsize; if (!src->buf || !src->size) return DELTA_SOURCE_EMPTY; buffer = src->buf; /* Determine index hash size. Note that indexing skips the first byte so we subtract 1 to get the edge cases right. */ stride = RABIN_WINDOW; num_entries = (src->size - 1) / RABIN_WINDOW; if (max_bytes_to_index > 0) { max_entries = (unsigned int) (max_bytes_to_index / RABIN_WINDOW); if (num_entries > max_entries) { /* Limit the max number of matching entries. This reduces the 'best' * possible match, but means we don't consume all of ram. */ num_entries = max_entries; stride = (src->size - 1) / num_entries; } } if (old != NULL) total_num_entries = num_entries + old->num_entries; else total_num_entries = num_entries; hsize = total_num_entries / 4; for (i = 4; (1u << i) < hsize && i < 31; i++); hsize = 1 << i; hmask = hsize - 1; if (old && old->hash_mask > hmask) { hmask = old->hash_mask; hsize = hmask + 1; } /* allocate lookup index */ memsize = sizeof(*hash) * hsize + sizeof(*entry) * total_num_entries; mem = malloc(memsize); if (!mem) return DELTA_OUT_OF_MEMORY; hash = mem; mem = hash + hsize; entry = mem; memset(hash, 0, hsize * sizeof(*hash)); /* allocate an array to count hash num_entries */ hash_count = calloc(hsize, sizeof(*hash_count)); if (!hash_count) { free(hash); return DELTA_OUT_OF_MEMORY; } /* then populate the index for the new data */ prev_val = ~0; for (data = buffer + num_entries * stride - RABIN_WINDOW; data >= buffer; data -= stride) { unsigned int val = 0; for (i = 1; i <= RABIN_WINDOW; i++) val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT]; if (val == prev_val) { /* keep the lowest of consecutive identical blocks */ entry[-1].entry.ptr = data + RABIN_WINDOW; --num_entries; --total_num_entries; } else { prev_val = val; i = val & hmask; entry->entry.ptr = data + RABIN_WINDOW; entry->entry.val = val; entry->entry.src = src; entry->next = hash[i]; hash[i] = entry++; hash_count[i]++; } } /* TODO: It would be nice to limit_hash_buckets at a better time. */ total_num_entries = limit_hash_buckets(hash, hash_count, hsize, total_num_entries); free(hash_count); index = pack_delta_index(hash, hsize, total_num_entries, old); free(hash); /* pack_delta_index only returns NULL on malloc failure */ if (!index) { return DELTA_OUT_OF_MEMORY; } index->last_src = src; *fresh = index; return DELTA_OK; } /* Take some entries, and put them into a custom hash. * @param entries A list of entries, sorted by position in file * @param num_entries Length of entries * @param out_hsize The maximum size of the hash, the final size will be * returned here */ struct index_entry_linked_list ** _put_entries_into_hash(struct index_entry *entries, unsigned int num_entries, unsigned int hsize) { unsigned int hash_offset, hmask, memsize; struct index_entry *entry; struct index_entry_linked_list *out_entry, **hash; void *mem; hmask = hsize - 1; memsize = sizeof(*hash) * hsize + sizeof(*out_entry) * num_entries; mem = malloc(memsize); if (!mem) return NULL; hash = mem; mem = hash + hsize; out_entry = mem; memset(hash, 0, sizeof(*hash)*(hsize+1)); /* We know that entries are in the order we want in the output, but they * aren't "grouped" by hash bucket yet. */ for (entry = entries + num_entries - 1; entry >= entries; --entry) { hash_offset = entry->val & hmask; out_entry->p_entry = entry; out_entry->next = hash[hash_offset]; /* TODO: Remove entries that have identical vals, or at least filter * the map a little bit. * if (hash[i] != NULL) { * } */ hash[hash_offset] = out_entry; ++out_entry; } return hash; } struct delta_index * create_index_from_old_and_new_entries(const struct delta_index *old_index, struct index_entry *entries, unsigned int num_entries) { unsigned int i, j, hsize, hmask, total_num_entries; struct delta_index *index; struct index_entry *entry, *packed_entry, **packed_hash; struct index_entry null_entry = {0}; void *mem; unsigned long memsize; struct index_entry_linked_list *unpacked_entry, **mini_hash; /* Determine index hash size. Note that indexing skips the first byte to allow for optimizing the Rabin's polynomial initialization in create_delta(). */ total_num_entries = num_entries + old_index->num_entries; hsize = total_num_entries / 4; for (i = 4; (1u << i) < hsize && i < 31; i++); hsize = 1 << i; if (hsize < old_index->hash_mask) { /* For some reason, there was a code path that would actually *shrink* * the hash size. This screws with some later code, and in general, I * think it better to make the hash bigger, rather than smaller. So * we'll just force the size here. * Possibly done by create_delta_index running into a * limit_hash_buckets call, that ended up transitioning across a * power-of-2. The cause isn't 100% clear, though. */ hsize = old_index->hash_mask + 1; } hmask = hsize - 1; // fprintf(stderr, "resizing index to insert %d entries into array" // " with %d entries: %x => %x\n", // num_entries, old_index->num_entries, old_index->hash_mask, hmask); memsize = sizeof(*index) + sizeof(*packed_hash) * (hsize+1) + sizeof(*packed_entry) * (total_num_entries + hsize*EXTRA_NULLS); mem = malloc(memsize); if (!mem) { return NULL; } index = mem; index->memsize = memsize; index->hash_mask = hmask; index->num_entries = total_num_entries; index->last_src = old_index->last_src; mem = index->hash; packed_hash = mem; mem = packed_hash + (hsize+1); packed_entry = mem; mini_hash = _put_entries_into_hash(entries, num_entries, hsize); if (mini_hash == NULL) { free(index); return NULL; } for (i = 0; i < hsize; i++) { /* * Coalesce all entries belonging in one hash bucket * into consecutive array entries. * The entries in old_index all come before 'entries'. */ packed_hash[i] = packed_entry; /* Copy any of the old entries across */ /* Would we rather use memcpy? */ if (hmask == old_index->hash_mask) { for (entry = old_index->hash[i]; entry < old_index->hash[i+1] && entry->ptr != NULL; ++entry) { assert((entry->val & hmask) == i); *packed_entry++ = *entry; } } else { /* If we resized the index from this action, all of the old values * will be found in the previous location, but they will end up * spread across the new locations. */ j = i & old_index->hash_mask; for (entry = old_index->hash[j]; entry < old_index->hash[j+1] && entry->ptr != NULL; ++entry) { assert((entry->val & old_index->hash_mask) == j); if ((entry->val & hmask) == i) { /* Any entries not picked up here will be picked up on the * next pass. */ *packed_entry++ = *entry; } } } /* Now see if we need to insert any of the new entries. * Note that loop ends up O(hsize*num_entries), so we expect that * num_entries is always small. * We also help a little bit by collapsing the entry range when the * endpoints are inserted. However, an alternative would be to build a * quick hash lookup for just the new entries. * Testing shows that this list can easily get up to about 100 * entries, the tradeoff is a malloc, 1 pass over the entries, copying * them into a sorted buffer, and a free() when done, */ for (unpacked_entry = mini_hash[i]; unpacked_entry; unpacked_entry = unpacked_entry->next) { assert((unpacked_entry->p_entry->val & hmask) == i); *packed_entry++ = *(unpacked_entry->p_entry); } /* Now insert some extra nulls */ for (j = 0; j < EXTRA_NULLS; ++j) { *packed_entry++ = null_entry; } } free(mini_hash); /* Sentinel value to indicate the length of the last hash bucket */ packed_hash[hsize] = packed_entry; if ((packed_entry - (struct index_entry *)mem) != (total_num_entries + hsize*EXTRA_NULLS)) { fprintf(stderr, "We expected %d entries, but created %d\n", total_num_entries + hsize*EXTRA_NULLS, (int)(packed_entry - (struct index_entry*)mem)); fflush(stderr); } assert((packed_entry - (struct index_entry *)mem) == (total_num_entries + hsize * EXTRA_NULLS)); index->last_entry = (packed_entry - 1); return index; } void get_text(char buff[128], const unsigned char *ptr) { unsigned int i; const unsigned char *start; unsigned char cmd; start = (ptr-RABIN_WINDOW-1); cmd = *(start); if (cmd < 0x80) {// This is likely to be an insert instruction if (cmd < RABIN_WINDOW) { cmd = RABIN_WINDOW; } } else { /* This was either a copy [should never be] or it * was a longer insert so the insert start happened at 16 more * bytes back. */ cmd = RABIN_WINDOW + 1; } if (cmd > 60) { cmd = 60; /* Be friendly to 80char terms */ } /* Copy the 1 byte command, and 4 bytes after the insert */ cmd += 5; memcpy(buff, start, cmd); buff[cmd] = 0; for (i = 0; i < cmd; ++i) { if (buff[i] == '\n') { buff[i] = 'N'; } else if (buff[i] == '\t') { buff[i] = 'T'; } } } delta_result create_delta_index_from_delta(const struct source_info *src, struct delta_index *old_index, struct delta_index **fresh) { unsigned int i, num_entries, max_num_entries, prev_val, num_inserted; unsigned int hash_offset; const unsigned char *data, *buffer, *top; unsigned char cmd; struct delta_index *new_index; struct index_entry *entry, *entries; if (!old_index) return DELTA_INDEX_NEEDED; if (!src->buf || !src->size) return DELTA_SOURCE_EMPTY; buffer = src->buf; top = buffer + src->size; /* Determine index hash size. Note that indexing skips the first byte to allow for optimizing the Rabin's polynomial initialization in create_delta(). This computes the maximum number of entries that could be held. The actual number will be recomputed during processing. */ max_num_entries = (src->size - 1) / RABIN_WINDOW; if (!max_num_entries) { *fresh = old_index; return DELTA_OK; } /* allocate an array to hold whatever entries we find */ entries = malloc(sizeof(*entry) * max_num_entries); if (!entries) /* malloc failure */ return DELTA_OUT_OF_MEMORY; /* then populate the index for the new data */ prev_val = ~0; data = buffer; /* target size */ /* get_delta_hdr_size doesn't mutate the content, just moves the * start-of-data pointer, so it is safe to do the cast. */ get_delta_hdr_size((unsigned char**)&data, top); entry = entries; /* start at the first slot */ num_entries = 0; /* calculate the real number of entries */ while (data < top) { cmd = *data++; if (cmd & 0x80) { /* Copy instruction, skip it */ if (cmd & 0x01) data++; if (cmd & 0x02) data++; if (cmd & 0x04) data++; if (cmd & 0x08) data++; if (cmd & 0x10) data++; if (cmd & 0x20) data++; if (cmd & 0x40) data++; } else if (cmd) { /* Insert instruction, we want to index these bytes */ if (data + cmd > top) { /* Invalid insert, not enough bytes in the delta */ break; } /* The create_delta code requires a match at least 4 characters * (including only the last char of the RABIN_WINDOW) before it * will consider it something worth copying rather than inserting. * So we don't want to index anything that we know won't ever be a * match. */ for (; cmd > RABIN_WINDOW + 3; cmd -= RABIN_WINDOW, data += RABIN_WINDOW) { unsigned int val = 0; for (i = 1; i <= RABIN_WINDOW; i++) val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT]; if (val != prev_val) { /* Only keep the first of consecutive data */ prev_val = val; num_entries++; entry->ptr = data + RABIN_WINDOW; entry->val = val; entry->src = src; entry++; if (num_entries > max_num_entries) { /* We ran out of entry room, something is really wrong */ break; } } } /* Move the data pointer by whatever remainder is left */ data += cmd; } else { /* * cmd == 0 is reserved for future encoding * extensions. In the mean time we must fail when * encountering them (might be data corruption). */ break; } } if (data != top) { /* The source_info data passed was corrupted or otherwise invalid */ free(entries); return DELTA_SOURCE_BAD; } if (num_entries == 0) { /** Nothing to index **/ free(entries); *fresh = old_index; return DELTA_OK; } old_index->last_src = src; /* See if we can fill in these values into the holes in the array */ entry = entries; num_inserted = 0; for (; num_entries > 0; --num_entries, ++entry) { struct index_entry *next_bucket_entry, *cur_entry, *bucket_first_entry; hash_offset = (entry->val & old_index->hash_mask); /* The basic structure is a hash => packed_entries that fit in that * hash bucket. Things are structured such that the hash-pointers are * strictly ordered. So we start by pointing to the next pointer, and * walk back until we stop getting NULL targets, and then go back * forward. If there are no NULL targets, then we know because * entry->ptr will not be NULL. */ // The start of the next bucket, this may point past the end of the // entry table if hash_offset is the last bucket. next_bucket_entry = old_index->hash[hash_offset + 1]; // First entry in this bucket bucket_first_entry = old_index->hash[hash_offset]; cur_entry = next_bucket_entry - 1; while (cur_entry->ptr == NULL && cur_entry >= bucket_first_entry) { cur_entry--; } // cur_entry now either points at the first NULL, or it points to // next_bucket_entry if there were no blank spots. cur_entry++; if (cur_entry >= next_bucket_entry || cur_entry->ptr != NULL) { /* There is no room for this entry, we have to resize */ // char buff[128]; // get_text(buff, entry->ptr); // fprintf(stderr, "Failed to find an opening @%x for %8x:\n '%s'\n", // hash_offset, entry->val, buff); // for (old_entry = old_index->hash[hash_offset]; // old_entry < old_index->hash[hash_offset+1]; // ++old_entry) { // get_text(buff, old_entry->ptr); // fprintf(stderr, " [%2d] %8x %8x: '%s'\n", // (int)(old_entry - old_index->hash[hash_offset]), // old_entry->val, old_entry->ptr, buff); // } break; } num_inserted++; *cur_entry = *entry; /* For entries which we *do* manage to insert into old_index, we don't * want them double copied into the final output. */ old_index->num_entries++; } if (num_entries > 0) { /* We couldn't fit the new entries into the old index, so allocate a * new one, and fill it with stuff. */ // fprintf(stderr, "inserted %d before resize\n", num_inserted); new_index = create_index_from_old_and_new_entries(old_index, entry, num_entries); } else { new_index = old_index; // fprintf(stderr, "inserted %d without resizing\n", num_inserted); } free(entries); /* create_index_from_old_and_new_entries returns NULL on malloc failure */ if (!new_index) return DELTA_OUT_OF_MEMORY; *fresh = new_index; return DELTA_OK; } void free_delta_index(struct delta_index *index) { free(index); } unsigned long sizeof_delta_index(struct delta_index *index) { if (index) return index->memsize; else return 0; } /* * The maximum size for any opcode sequence, including the initial header * plus Rabin window plus biggest copy. */ #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7) delta_result create_delta(const struct delta_index *index, const void *trg_buf, unsigned long trg_size, unsigned long *delta_size, unsigned long max_size, void **delta_data) { unsigned int i, outpos, outsize, moff, val; int msize; const struct source_info *msource; int inscnt; const unsigned char *ref_data, *ref_top, *data, *top; unsigned char *out; unsigned long source_size; if (!trg_buf || !trg_size) return DELTA_BUFFER_EMPTY; if (index == NULL) return DELTA_INDEX_NEEDED; outpos = 0; outsize = 8192; if (max_size && outsize >= max_size) outsize = max_size + MAX_OP_SIZE + 1; out = malloc(outsize); if (!out) return DELTA_OUT_OF_MEMORY; source_size = index->last_src->size + index->last_src->agg_offset; /* store target buffer size */ i = trg_size; while (i >= 0x80) { out[outpos++] = i | 0x80; i >>= 7; } out[outpos++] = i; data = trg_buf; top = (const unsigned char *) trg_buf + trg_size; /* Start the matching by filling out with a simple 'insert' instruction, of * the first RABIN_WINDOW bytes of the input. */ outpos++; /* leave a byte for the insert command */ val = 0; for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) { out[outpos++] = *data; val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; } /* we are now setup with an insert of 'i' bytes and val contains the RABIN * hash for those bytes, and data points to the RABIN_WINDOW+1 byte of * input. */ inscnt = i; moff = 0; msize = 0; msource = NULL; while (data < top) { if (msize < 4096) { /* we don't have a 'worthy enough' match yet, so let's look for * one. */ struct index_entry *entry; /* Shift the window by one byte. */ val ^= U[data[-RABIN_WINDOW]]; val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; i = val & index->hash_mask; /* TODO: When using multiple indexes like this, the hash tables * mapping val => index_entry become less efficient. * You end up getting a lot more collisions in the hash, * which doesn't actually lead to a entry->val match. */ for (entry = index->hash[i]; entry < index->hash[i+1] && entry->src != NULL; entry++) { const unsigned char *ref; const unsigned char *src; int ref_size; if (entry->val != val) continue; ref = entry->ptr; src = data; ref_data = entry->src->buf; ref_top = ref_data + entry->src->size; ref_size = ref_top - ref; /* ref_size is the longest possible match that we could make * here. If ref_size <= msize, then we know that we cannot * match more bytes with this location that we have already * matched. */ if (ref_size > (top - src)) ref_size = top - src; if (ref_size <= msize) break; /* See how many bytes actually match at this location. */ while (ref_size-- && *src++ == *ref) ref++; if (msize < (ref - entry->ptr)) { /* this is our best match so far */ msize = ref - entry->ptr; msource = entry->src; moff = entry->ptr - ref_data; if (msize >= 4096) /* good enough */ break; } } } if (msize < 4) { /* The best match right now is less than 4 bytes long. So just add * the current byte to the insert instruction. Increment the insert * counter, and copy the byte of data into the output buffer. */ if (!inscnt) outpos++; out[outpos++] = *data++; inscnt++; if (inscnt == 0x7f) { /* We have a max length insert instruction, finalize it in the * output. */ out[outpos - inscnt - 1] = inscnt; inscnt = 0; } msize = 0; } else { unsigned int left; unsigned char *op; if (inscnt) { ref_data = msource->buf; while (moff && ref_data[moff-1] == data[-1]) { /* we can match one byte back */ msize++; moff--; data--; outpos--; if (--inscnt) continue; outpos--; /* remove count slot */ inscnt--; /* make it -1 */ break; } out[outpos - inscnt - 1] = inscnt; inscnt = 0; } /* A copy op is currently limited to 64KB (pack v2) */ left = (msize < 0x10000) ? 0 : (msize - 0x10000); msize -= left; op = out + outpos++; i = 0x80; /* moff is the offset in the local structure, for encoding, we need * to push it into the global offset */ assert(moff < msource->size); moff += msource->agg_offset; assert(moff + msize <= source_size); if (moff & 0x000000ff) out[outpos++] = moff >> 0, i |= 0x01; if (moff & 0x0000ff00) out[outpos++] = moff >> 8, i |= 0x02; if (moff & 0x00ff0000) out[outpos++] = moff >> 16, i |= 0x04; if (moff & 0xff000000) out[outpos++] = moff >> 24, i |= 0x08; /* Put it back into local coordinates, in case we have multiple * copies in a row. */ moff -= msource->agg_offset; if (msize & 0x00ff) out[outpos++] = msize >> 0, i |= 0x10; if (msize & 0xff00) out[outpos++] = msize >> 8, i |= 0x20; *op = i; data += msize; moff += msize; msize = left; if (msize < 4096) { int j; val = 0; for (j = -RABIN_WINDOW; j < 0; j++) val = ((val << 8) | data[j]) ^ T[val >> RABIN_SHIFT]; } } if (outpos >= outsize - MAX_OP_SIZE) { void *tmp = out; outsize = outsize * 3 / 2; if (max_size && outsize >= max_size) outsize = max_size + MAX_OP_SIZE + 1; if (max_size && outpos > max_size) break; out = realloc(out, outsize); if (!out) { free(tmp); return DELTA_OUT_OF_MEMORY; } } } if (inscnt) out[outpos - inscnt - 1] = inscnt; if (max_size && outpos > max_size) { free(out); return DELTA_SIZE_TOO_BIG; } *delta_size = outpos; *delta_data = out; return DELTA_OK; } int get_entry_summary(const struct delta_index *index, int pos, unsigned int *text_offset, unsigned int *hash_val) { int hsize; const struct index_entry *entry; const struct index_entry *start_of_entries; unsigned int offset; if (pos < 0 || text_offset == NULL || hash_val == NULL || index == NULL) { return 0; } hsize = index->hash_mask + 1; start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1)); entry = start_of_entries + pos; if (entry > index->last_entry) { return 0; } if (entry->ptr == NULL) { *text_offset = 0; *hash_val = 0; } else { offset = entry->src->agg_offset; offset += (entry->ptr - ((unsigned char *)entry->src->buf)); *text_offset = offset; *hash_val = entry->val; } return 1; } int get_hash_offset(const struct delta_index *index, int pos, unsigned int *entry_offset) { int hsize; const struct index_entry *entry; const struct index_entry *start_of_entries; if (pos < 0 || index == NULL || entry_offset == NULL) { return 0; } hsize = index->hash_mask + 1; start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1)); if (pos >= hsize) { return 0; } entry = index->hash[pos]; if (entry == NULL) { *entry_offset = -1; } else { *entry_offset = (entry - start_of_entries); } return 1; } unsigned int rabin_hash(const unsigned char *data) { int i; unsigned int val = 0; for (i = 0; i < RABIN_WINDOW; i++) val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT]; return val; } /* vim: et ts=4 sw=4 sts=4 */ bzr-2.7.0/bzrlib/diff.py0000644000000000000000000012042612414505677013231 0ustar 00000000000000# Copyright (C) 2005-2014 Canonical Ltd. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import difflib import os import re import string import sys from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import errno import subprocess import tempfile from bzrlib import ( cleanup, cmdline, controldir, errors, osutils, patiencediff, textfile, timestamp, views, ) from bzrlib.workingtree import WorkingTree from bzrlib.i18n import gettext """) from bzrlib.registry import ( Registry, ) from bzrlib.trace import mutter, note, warning DEFAULT_CONTEXT_AMOUNT = 3 class AtTemplate(string.Template): """Templating class that uses @ instead of $.""" delimiter = '@' # TODO: Rather than building a changeset object, we should probably # invoke callbacks on an object. That object can either accumulate a # list, write them out directly, etc etc. class _PrematchedMatcher(difflib.SequenceMatcher): """Allow SequenceMatcher operations to use predetermined blocks""" def __init__(self, matching_blocks): difflib.SequenceMatcher(self, None, None) self.matching_blocks = matching_blocks self.opcodes = None def internal_diff(old_filename, oldlines, new_filename, newlines, to_file, allow_binary=False, sequence_matcher=None, path_encoding='utf8', context_lines=DEFAULT_CONTEXT_AMOUNT): # FIXME: difflib is wrong if there is no trailing newline. # The syntax used by patch seems to be "\ No newline at # end of file" following the last diff line from that # file. This is not trivial to insert into the # unified_diff output and it might be better to just fix # or replace that function. # In the meantime we at least make sure the patch isn't # mangled. # Special workaround for Python2.3, where difflib fails if # both sequences are empty. if not oldlines and not newlines: return if allow_binary is False: textfile.check_text_lines(oldlines) textfile.check_text_lines(newlines) if sequence_matcher is None: sequence_matcher = patiencediff.PatienceSequenceMatcher ud = patiencediff.unified_diff(oldlines, newlines, fromfile=old_filename.encode(path_encoding, 'replace'), tofile=new_filename.encode(path_encoding, 'replace'), n=context_lines, sequencematcher=sequence_matcher) ud = list(ud) if len(ud) == 0: # Identical contents, nothing to do return # work-around for difflib being too smart for its own good # if /dev/null is "1,0", patch won't recognize it as /dev/null if not oldlines: ud[2] = ud[2].replace('-1,0', '-0,0') elif not newlines: ud[2] = ud[2].replace('+1,0', '+0,0') for line in ud: to_file.write(line) if not line.endswith('\n'): to_file.write("\n\\ No newline at end of file\n") to_file.write('\n') def _spawn_external_diff(diffcmd, capture_errors=True): """Spawn the external diff process, and return the child handle. :param diffcmd: The command list to spawn :param capture_errors: Capture stderr as well as setting LANG=C and LC_ALL=C. This lets us read and understand the output of diff, and respond to any errors. :return: A Popen object. """ if capture_errors: # construct minimal environment env = {} path = os.environ.get('PATH') if path is not None: env['PATH'] = path env['LANGUAGE'] = 'C' # on win32 only LANGUAGE has effect env['LANG'] = 'C' env['LC_ALL'] = 'C' stderr = subprocess.PIPE else: env = None stderr = None try: pipe = subprocess.Popen(diffcmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=stderr, env=env) except OSError, e: if e.errno == errno.ENOENT: raise errors.NoDiff(str(e)) raise return pipe # diff style options as of GNU diff v3.2 style_option_list = ['-c', '-C', '--context', '-e', '--ed', '-f', '--forward-ed', '-q', '--brief', '--normal', '-n', '--rcs', '-u', '-U', '--unified', '-y', '--side-by-side', '-D', '--ifdef'] def default_style_unified(diff_opts): """Default to unified diff style if alternative not specified in diff_opts. diff only allows one style to be specified; they don't override. Note that some of these take optargs, and the optargs can be directly appended to the options. This is only an approximate parser; it doesn't properly understand the grammar. :param diff_opts: List of options for external (GNU) diff. :return: List of options with default style=='unified'. """ for s in style_option_list: for j in diff_opts: if j.startswith(s): break else: continue break else: diff_opts.append('-u') return diff_opts def external_diff(old_filename, oldlines, new_filename, newlines, to_file, diff_opts): """Display a diff by calling out to the external diff program.""" # make sure our own output is properly ordered before the diff to_file.flush() oldtmp_fd, old_abspath = tempfile.mkstemp(prefix='bzr-diff-old-') newtmp_fd, new_abspath = tempfile.mkstemp(prefix='bzr-diff-new-') oldtmpf = os.fdopen(oldtmp_fd, 'wb') newtmpf = os.fdopen(newtmp_fd, 'wb') try: # TODO: perhaps a special case for comparing to or from the empty # sequence; can just use /dev/null on Unix # TODO: if either of the files being compared already exists as a # regular named file (e.g. in the working directory) then we can # compare directly to that, rather than copying it. oldtmpf.writelines(oldlines) newtmpf.writelines(newlines) oldtmpf.close() newtmpf.close() if not diff_opts: diff_opts = [] if sys.platform == 'win32': # Popen doesn't do the proper encoding for external commands # Since we are dealing with an ANSI api, use mbcs encoding old_filename = old_filename.encode('mbcs') new_filename = new_filename.encode('mbcs') diffcmd = ['diff', '--label', old_filename, old_abspath, '--label', new_filename, new_abspath, '--binary', ] diff_opts = default_style_unified(diff_opts) if diff_opts: diffcmd.extend(diff_opts) pipe = _spawn_external_diff(diffcmd, capture_errors=True) out,err = pipe.communicate() rc = pipe.returncode # internal_diff() adds a trailing newline, add one here for consistency out += '\n' if rc == 2: # 'diff' gives retcode == 2 for all sorts of errors # one of those is 'Binary files differ'. # Bad options could also be the problem. # 'Binary files' is not a real error, so we suppress that error. lang_c_out = out # Since we got here, we want to make sure to give an i18n error pipe = _spawn_external_diff(diffcmd, capture_errors=False) out, err = pipe.communicate() # Write out the new i18n diff response to_file.write(out+'\n') if pipe.returncode != 2: raise errors.BzrError( 'external diff failed with exit code 2' ' when run with LANG=C and LC_ALL=C,' ' but not when run natively: %r' % (diffcmd,)) first_line = lang_c_out.split('\n', 1)[0] # Starting with diffutils 2.8.4 the word "binary" was dropped. m = re.match('^(binary )?files.*differ$', first_line, re.I) if m is None: raise errors.BzrError('external diff failed with exit code 2;' ' command: %r' % (diffcmd,)) else: # Binary files differ, just return return # If we got to here, we haven't written out the output of diff # do so now to_file.write(out) if rc not in (0, 1): # returns 1 if files differ; that's OK if rc < 0: msg = 'signal %d' % (-rc) else: msg = 'exit code %d' % rc raise errors.BzrError('external diff failed with %s; command: %r' % (msg, diffcmd)) finally: oldtmpf.close() # and delete newtmpf.close() def cleanup(path): # Warn in case the file couldn't be deleted (in case windows still # holds the file open, but not if the files have already been # deleted) try: os.remove(path) except OSError, e: if e.errno not in (errno.ENOENT,): warning('Failed to delete temporary file: %s %s', path, e) cleanup(old_abspath) cleanup(new_abspath) def get_trees_and_branches_to_diff_locked( path_list, revision_specs, old_url, new_url, add_cleanup, apply_view=True): """Get the trees and specific files to diff given a list of paths. This method works out the trees to be diff'ed and the files of interest within those trees. :param path_list: the list of arguments passed to the diff command :param revision_specs: Zero, one or two RevisionSpecs from the diff command line, saying what revisions to compare. :param old_url: The url of the old branch or tree. If None, the tree to use is taken from the first path, if any, or the current working tree. :param new_url: The url of the new branch or tree. If None, the tree to use is taken from the first path, if any, or the current working tree. :param add_cleanup: a callable like Command.add_cleanup. get_trees_and_branches_to_diff will register cleanups that must be run to unlock the trees, etc. :param apply_view: if True and a view is set, apply the view or check that the paths are within it :returns: a tuple of (old_tree, new_tree, old_branch, new_branch, specific_files, extra_trees) where extra_trees is a sequence of additional trees to search in for file-ids. The trees and branches will be read-locked until the cleanups registered via the add_cleanup param are run. """ # Get the old and new revision specs old_revision_spec = None new_revision_spec = None if revision_specs is not None: if len(revision_specs) > 0: old_revision_spec = revision_specs[0] if old_url is None: old_url = old_revision_spec.get_branch() if len(revision_specs) > 1: new_revision_spec = revision_specs[1] if new_url is None: new_url = new_revision_spec.get_branch() other_paths = [] make_paths_wt_relative = True consider_relpath = True if path_list is None or len(path_list) == 0: # If no path is given, the current working tree is used default_location = u'.' consider_relpath = False elif old_url is not None and new_url is not None: other_paths = path_list make_paths_wt_relative = False else: default_location = path_list[0] other_paths = path_list[1:] def lock_tree_or_branch(wt, br): if wt is not None: wt.lock_read() add_cleanup(wt.unlock) elif br is not None: br.lock_read() add_cleanup(br.unlock) # Get the old location specific_files = [] if old_url is None: old_url = default_location working_tree, branch, relpath = \ controldir.ControlDir.open_containing_tree_or_branch(old_url) lock_tree_or_branch(working_tree, branch) if consider_relpath and relpath != '': if working_tree is not None and apply_view: views.check_path_in_view(working_tree, relpath) specific_files.append(relpath) old_tree = _get_tree_to_diff(old_revision_spec, working_tree, branch) old_branch = branch # Get the new location if new_url is None: new_url = default_location if new_url != old_url: working_tree, branch, relpath = \ controldir.ControlDir.open_containing_tree_or_branch(new_url) lock_tree_or_branch(working_tree, branch) if consider_relpath and relpath != '': if working_tree is not None and apply_view: views.check_path_in_view(working_tree, relpath) specific_files.append(relpath) new_tree = _get_tree_to_diff(new_revision_spec, working_tree, branch, basis_is_default=working_tree is None) new_branch = branch # Get the specific files (all files is None, no files is []) if make_paths_wt_relative and working_tree is not None: other_paths = working_tree.safe_relpath_files( other_paths, apply_view=apply_view) specific_files.extend(other_paths) if len(specific_files) == 0: specific_files = None if (working_tree is not None and working_tree.supports_views() and apply_view): view_files = working_tree.views.lookup_view() if view_files: specific_files = view_files view_str = views.view_display_str(view_files) note(gettext("*** Ignoring files outside view. View is %s") % view_str) # Get extra trees that ought to be searched for file-ids extra_trees = None if working_tree is not None and working_tree not in (old_tree, new_tree): extra_trees = (working_tree,) return (old_tree, new_tree, old_branch, new_branch, specific_files, extra_trees) def _get_tree_to_diff(spec, tree=None, branch=None, basis_is_default=True): if branch is None and tree is not None: branch = tree.branch if spec is None or spec.spec is None: if basis_is_default: if tree is not None: return tree.basis_tree() else: return branch.basis_tree() else: return tree return spec.as_tree(branch) def show_diff_trees(old_tree, new_tree, to_file, specific_files=None, external_diff_options=None, old_label='a/', new_label='b/', extra_trees=None, path_encoding='utf8', using=None, format_cls=None, context=DEFAULT_CONTEXT_AMOUNT): """Show in text form the changes from one tree to another. :param to_file: The output stream. :param specific_files: Include only changes to these files - None for all changes. :param external_diff_options: If set, use an external GNU diff and pass these options. :param extra_trees: If set, more Trees to use for looking up file ids :param path_encoding: If set, the path will be encoded as specified, otherwise is supposed to be utf8 :param format_cls: Formatter class (DiffTree subclass) """ if context is None: context = DEFAULT_CONTEXT_AMOUNT if format_cls is None: format_cls = DiffTree old_tree.lock_read() try: if extra_trees is not None: for tree in extra_trees: tree.lock_read() new_tree.lock_read() try: differ = format_cls.from_trees_options(old_tree, new_tree, to_file, path_encoding, external_diff_options, old_label, new_label, using, context_lines=context) return differ.show_diff(specific_files, extra_trees) finally: new_tree.unlock() if extra_trees is not None: for tree in extra_trees: tree.unlock() finally: old_tree.unlock() def _patch_header_date(tree, file_id, path): """Returns a timestamp suitable for use in a patch header.""" try: mtime = tree.get_file_mtime(file_id, path) except errors.FileTimestampUnavailable: mtime = 0 return timestamp.format_patch_date(mtime) def get_executable_change(old_is_x, new_is_x): descr = { True:"+x", False:"-x", None:"??" } if old_is_x != new_is_x: return ["%s to %s" % (descr[old_is_x], descr[new_is_x],)] else: return [] class DiffPath(object): """Base type for command object that compare files""" # The type or contents of the file were unsuitable for diffing CANNOT_DIFF = 'CANNOT_DIFF' # The file has changed in a semantic way CHANGED = 'CHANGED' # The file content may have changed, but there is no semantic change UNCHANGED = 'UNCHANGED' def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8'): """Constructor. :param old_tree: The tree to show as the old tree in the comparison :param new_tree: The tree to show as new in the comparison :param to_file: The file to write comparison data to :param path_encoding: The character encoding to write paths in """ self.old_tree = old_tree self.new_tree = new_tree self.to_file = to_file self.path_encoding = path_encoding def finish(self): pass @classmethod def from_diff_tree(klass, diff_tree): return klass(diff_tree.old_tree, diff_tree.new_tree, diff_tree.to_file, diff_tree.path_encoding) @staticmethod def _diff_many(differs, file_id, old_path, new_path, old_kind, new_kind): for file_differ in differs: result = file_differ.diff(file_id, old_path, new_path, old_kind, new_kind) if result is not DiffPath.CANNOT_DIFF: return result else: return DiffPath.CANNOT_DIFF class DiffKindChange(object): """Special differ for file kind changes. Represents kind change as deletion + creation. Uses the other differs to do this. """ def __init__(self, differs): self.differs = differs def finish(self): pass @classmethod def from_diff_tree(klass, diff_tree): return klass(diff_tree.differs) def diff(self, file_id, old_path, new_path, old_kind, new_kind): """Perform comparison :param file_id: The file_id of the file to compare :param old_path: Path of the file in the old tree :param new_path: Path of the file in the new tree :param old_kind: Old file-kind of the file :param new_kind: New file-kind of the file """ if None in (old_kind, new_kind): return DiffPath.CANNOT_DIFF result = DiffPath._diff_many(self.differs, file_id, old_path, new_path, old_kind, None) if result is DiffPath.CANNOT_DIFF: return result return DiffPath._diff_many(self.differs, file_id, old_path, new_path, None, new_kind) class DiffDirectory(DiffPath): def diff(self, file_id, old_path, new_path, old_kind, new_kind): """Perform comparison between two directories. (dummy) """ if 'directory' not in (old_kind, new_kind): return self.CANNOT_DIFF if old_kind not in ('directory', None): return self.CANNOT_DIFF if new_kind not in ('directory', None): return self.CANNOT_DIFF return self.CHANGED class DiffSymlink(DiffPath): def diff(self, file_id, old_path, new_path, old_kind, new_kind): """Perform comparison between two symlinks :param file_id: The file_id of the file to compare :param old_path: Path of the file in the old tree :param new_path: Path of the file in the new tree :param old_kind: Old file-kind of the file :param new_kind: New file-kind of the file """ if 'symlink' not in (old_kind, new_kind): return self.CANNOT_DIFF if old_kind == 'symlink': old_target = self.old_tree.get_symlink_target(file_id) elif old_kind is None: old_target = None else: return self.CANNOT_DIFF if new_kind == 'symlink': new_target = self.new_tree.get_symlink_target(file_id) elif new_kind is None: new_target = None else: return self.CANNOT_DIFF return self.diff_symlink(old_target, new_target) def diff_symlink(self, old_target, new_target): if old_target is None: self.to_file.write('=== target is %r\n' % new_target) elif new_target is None: self.to_file.write('=== target was %r\n' % old_target) else: self.to_file.write('=== target changed %r => %r\n' % (old_target, new_target)) return self.CHANGED class DiffText(DiffPath): # GNU Patch uses the epoch date to detect files that are being added # or removed in a diff. EPOCH_DATE = '1970-01-01 00:00:00 +0000' def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8', old_label='', new_label='', text_differ=internal_diff, context_lines=DEFAULT_CONTEXT_AMOUNT): DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding) self.text_differ = text_differ self.old_label = old_label self.new_label = new_label self.path_encoding = path_encoding self.context_lines = context_lines def diff(self, file_id, old_path, new_path, old_kind, new_kind): """Compare two files in unified diff format :param file_id: The file_id of the file to compare :param old_path: Path of the file in the old tree :param new_path: Path of the file in the new tree :param old_kind: Old file-kind of the file :param new_kind: New file-kind of the file """ if 'file' not in (old_kind, new_kind): return self.CANNOT_DIFF from_file_id = to_file_id = file_id if old_kind == 'file': old_date = _patch_header_date(self.old_tree, file_id, old_path) elif old_kind is None: old_date = self.EPOCH_DATE from_file_id = None else: return self.CANNOT_DIFF if new_kind == 'file': new_date = _patch_header_date(self.new_tree, file_id, new_path) elif new_kind is None: new_date = self.EPOCH_DATE to_file_id = None else: return self.CANNOT_DIFF from_label = '%s%s\t%s' % (self.old_label, old_path, old_date) to_label = '%s%s\t%s' % (self.new_label, new_path, new_date) return self.diff_text(from_file_id, to_file_id, from_label, to_label, old_path, new_path) def diff_text(self, from_file_id, to_file_id, from_label, to_label, from_path=None, to_path=None): """Diff the content of given files in two trees :param from_file_id: The id of the file in the from tree. If None, the file is not present in the from tree. :param to_file_id: The id of the file in the to tree. This may refer to a different file from from_file_id. If None, the file is not present in the to tree. :param from_path: The path in the from tree or None if unknown. :param to_path: The path in the to tree or None if unknown. """ def _get_text(tree, file_id, path): if file_id is not None: return tree.get_file_lines(file_id, path) else: return [] try: from_text = _get_text(self.old_tree, from_file_id, from_path) to_text = _get_text(self.new_tree, to_file_id, to_path) self.text_differ(from_label, from_text, to_label, to_text, self.to_file, path_encoding=self.path_encoding, context_lines=self.context_lines) except errors.BinaryFile: self.to_file.write( ("Binary files %s and %s differ\n" % (from_label, to_label)).encode(self.path_encoding,'replace')) return self.CHANGED class DiffFromTool(DiffPath): def __init__(self, command_template, old_tree, new_tree, to_file, path_encoding='utf-8'): DiffPath.__init__(self, old_tree, new_tree, to_file, path_encoding) self.command_template = command_template self._root = osutils.mkdtemp(prefix='bzr-diff-') @classmethod def from_string(klass, command_string, old_tree, new_tree, to_file, path_encoding='utf-8'): command_template = cmdline.split(command_string) if '@' not in command_string: command_template.extend(['@old_path', '@new_path']) return klass(command_template, old_tree, new_tree, to_file, path_encoding) @classmethod def make_from_diff_tree(klass, command_string, external_diff_options=None): def from_diff_tree(diff_tree): full_command_string = [command_string] if external_diff_options is not None: full_command_string += ' ' + external_diff_options return klass.from_string(full_command_string, diff_tree.old_tree, diff_tree.new_tree, diff_tree.to_file) return from_diff_tree def _get_command(self, old_path, new_path): my_map = {'old_path': old_path, 'new_path': new_path} command = [AtTemplate(t).substitute(my_map) for t in self.command_template] if sys.platform == 'win32': # Popen doesn't accept unicode on win32 command_encoded = [] for c in command: if isinstance(c, unicode): command_encoded.append(c.encode('mbcs')) else: command_encoded.append(c) return command_encoded else: return command def _execute(self, old_path, new_path): command = self._get_command(old_path, new_path) try: proc = subprocess.Popen(command, stdout=subprocess.PIPE, cwd=self._root) except OSError, e: if e.errno == errno.ENOENT: raise errors.ExecutableMissing(command[0]) else: raise self.to_file.write(proc.stdout.read()) return proc.wait() def _try_symlink_root(self, tree, prefix): if (getattr(tree, 'abspath', None) is None or not osutils.host_os_dereferences_symlinks()): return False try: os.symlink(tree.abspath(''), osutils.pathjoin(self._root, prefix)) except OSError, e: if e.errno != errno.EEXIST: raise return True @staticmethod def _fenc(): """Returns safe encoding for passing file path to diff tool""" if sys.platform == 'win32': return 'mbcs' else: # Don't fallback to 'utf-8' because subprocess may not be able to # handle utf-8 correctly when locale is not utf-8. return sys.getfilesystemencoding() or 'ascii' def _is_safepath(self, path): """Return true if `path` may be able to pass to subprocess.""" fenc = self._fenc() try: return path == path.encode(fenc).decode(fenc) except UnicodeError: return False def _safe_filename(self, prefix, relpath): """Replace unsafe character in `relpath` then join `self._root`, `prefix` and `relpath`.""" fenc = self._fenc() # encoded_str.replace('?', '_') may break multibyte char. # So we should encode, decode, then replace(u'?', u'_') relpath_tmp = relpath.encode(fenc, 'replace').decode(fenc, 'replace') relpath_tmp = relpath_tmp.replace(u'?', u'_') return osutils.pathjoin(self._root, prefix, relpath_tmp) def _write_file(self, file_id, tree, prefix, relpath, force_temp=False, allow_write=False): if not force_temp and isinstance(tree, WorkingTree): full_path = tree.abspath(tree.id2path(file_id)) if self._is_safepath(full_path): return full_path full_path = self._safe_filename(prefix, relpath) if not force_temp and self._try_symlink_root(tree, prefix): return full_path parent_dir = osutils.dirname(full_path) try: os.makedirs(parent_dir) except OSError, e: if e.errno != errno.EEXIST: raise source = tree.get_file(file_id, relpath) try: target = open(full_path, 'wb') try: osutils.pumpfile(source, target) finally: target.close() finally: source.close() try: mtime = tree.get_file_mtime(file_id) except errors.FileTimestampUnavailable: pass else: os.utime(full_path, (mtime, mtime)) if not allow_write: osutils.make_readonly(full_path) return full_path def _prepare_files(self, file_id, old_path, new_path, force_temp=False, allow_write_new=False): old_disk_path = self._write_file(file_id, self.old_tree, 'old', old_path, force_temp) new_disk_path = self._write_file(file_id, self.new_tree, 'new', new_path, force_temp, allow_write=allow_write_new) return old_disk_path, new_disk_path def finish(self): try: osutils.rmtree(self._root) except OSError, e: if e.errno != errno.ENOENT: mutter("The temporary directory \"%s\" was not " "cleanly removed: %s." % (self._root, e)) def diff(self, file_id, old_path, new_path, old_kind, new_kind): if (old_kind, new_kind) != ('file', 'file'): return DiffPath.CANNOT_DIFF (old_disk_path, new_disk_path) = self._prepare_files( file_id, old_path, new_path) self._execute(old_disk_path, new_disk_path) def edit_file(self, file_id): """Use this tool to edit a file. A temporary copy will be edited, and the new contents will be returned. :param file_id: The id of the file to edit. :return: The new contents of the file. """ old_path = self.old_tree.id2path(file_id) new_path = self.new_tree.id2path(file_id) old_abs_path, new_abs_path = self._prepare_files( file_id, old_path, new_path, allow_write_new=True, force_temp=True) command = self._get_command(old_abs_path, new_abs_path) subprocess.call(command, cwd=self._root) new_file = open(new_abs_path, 'rb') try: return new_file.read() finally: new_file.close() class DiffTree(object): """Provides textual representations of the difference between two trees. A DiffTree examines two trees and where a file-id has altered between them, generates a textual representation of the difference. DiffTree uses a sequence of DiffPath objects which are each given the opportunity to handle a given altered fileid. The list of DiffPath objects can be extended globally by appending to DiffTree.diff_factories, or for a specific diff operation by supplying the extra_factories option to the appropriate method. """ # list of factories that can provide instances of DiffPath objects # may be extended by plugins. diff_factories = [DiffSymlink.from_diff_tree, DiffDirectory.from_diff_tree] def __init__(self, old_tree, new_tree, to_file, path_encoding='utf-8', diff_text=None, extra_factories=None): """Constructor :param old_tree: Tree to show as old in the comparison :param new_tree: Tree to show as new in the comparison :param to_file: File to write comparision to :param path_encoding: Character encoding to write paths in :param diff_text: DiffPath-type object to use as a last resort for diffing text files. :param extra_factories: Factories of DiffPaths to try before any other DiffPaths""" if diff_text is None: diff_text = DiffText(old_tree, new_tree, to_file, path_encoding, '', '', internal_diff) self.old_tree = old_tree self.new_tree = new_tree self.to_file = to_file self.path_encoding = path_encoding self.differs = [] if extra_factories is not None: self.differs.extend(f(self) for f in extra_factories) self.differs.extend(f(self) for f in self.diff_factories) self.differs.extend([diff_text, DiffKindChange.from_diff_tree(self)]) @classmethod def from_trees_options(klass, old_tree, new_tree, to_file, path_encoding, external_diff_options, old_label, new_label, using, context_lines): """Factory for producing a DiffTree. Designed to accept options used by show_diff_trees. :param old_tree: The tree to show as old in the comparison :param new_tree: The tree to show as new in the comparison :param to_file: File to write comparisons to :param path_encoding: Character encoding to use for writing paths :param external_diff_options: If supplied, use the installed diff binary to perform file comparison, using supplied options. :param old_label: Prefix to use for old file labels :param new_label: Prefix to use for new file labels :param using: Commandline to use to invoke an external diff tool """ if using is not None: extra_factories = [DiffFromTool.make_from_diff_tree(using, external_diff_options)] else: extra_factories = [] if external_diff_options: opts = external_diff_options.split() def diff_file(olab, olines, nlab, nlines, to_file, path_encoding=None, context_lines=None): """:param path_encoding: not used but required to match the signature of internal_diff. """ external_diff(olab, olines, nlab, nlines, to_file, opts) else: diff_file = internal_diff diff_text = DiffText(old_tree, new_tree, to_file, path_encoding, old_label, new_label, diff_file, context_lines=context_lines) return klass(old_tree, new_tree, to_file, path_encoding, diff_text, extra_factories) def show_diff(self, specific_files, extra_trees=None): """Write tree diff to self.to_file :param specific_files: the specific files to compare (recursive) :param extra_trees: extra trees to use for mapping paths to file_ids """ try: return self._show_diff(specific_files, extra_trees) finally: for differ in self.differs: differ.finish() def _show_diff(self, specific_files, extra_trees): # TODO: Generation of pseudo-diffs for added/deleted files could # be usefully made into a much faster special case. iterator = self.new_tree.iter_changes(self.old_tree, specific_files=specific_files, extra_trees=extra_trees, require_versioned=True) has_changes = 0 def changes_key(change): old_path, new_path = change[1] path = new_path if path is None: path = old_path return path def get_encoded_path(path): if path is not None: return path.encode(self.path_encoding, "replace") for (file_id, paths, changed_content, versioned, parent, name, kind, executable) in sorted(iterator, key=changes_key): # The root does not get diffed, and items with no known kind (that # is, missing) in both trees are skipped as well. if parent == (None, None) or kind == (None, None): continue oldpath, newpath = paths oldpath_encoded = get_encoded_path(paths[0]) newpath_encoded = get_encoded_path(paths[1]) old_present = (kind[0] is not None and versioned[0]) new_present = (kind[1] is not None and versioned[1]) renamed = (parent[0], name[0]) != (parent[1], name[1]) properties_changed = [] properties_changed.extend(get_executable_change(executable[0], executable[1])) if properties_changed: prop_str = " (properties changed: %s)" % (", ".join(properties_changed),) else: prop_str = "" if (old_present, new_present) == (True, False): self.to_file.write("=== removed %s '%s'\n" % (kind[0], oldpath_encoded)) newpath = oldpath elif (old_present, new_present) == (False, True): self.to_file.write("=== added %s '%s'\n" % (kind[1], newpath_encoded)) oldpath = newpath elif renamed: self.to_file.write("=== renamed %s '%s' => '%s'%s\n" % (kind[0], oldpath_encoded, newpath_encoded, prop_str)) else: # if it was produced by iter_changes, it must be # modified *somehow*, either content or execute bit. self.to_file.write("=== modified %s '%s'%s\n" % (kind[0], newpath_encoded, prop_str)) if changed_content: self._diff(file_id, oldpath, newpath, kind[0], kind[1]) has_changes = 1 if renamed: has_changes = 1 return has_changes def diff(self, file_id, old_path, new_path): """Perform a diff of a single file :param file_id: file-id of the file :param old_path: The path of the file in the old tree :param new_path: The path of the file in the new tree """ try: old_kind = self.old_tree.kind(file_id) except (errors.NoSuchId, errors.NoSuchFile): old_kind = None try: new_kind = self.new_tree.kind(file_id) except (errors.NoSuchId, errors.NoSuchFile): new_kind = None self._diff(file_id, old_path, new_path, old_kind, new_kind) def _diff(self, file_id, old_path, new_path, old_kind, new_kind): result = DiffPath._diff_many(self.differs, file_id, old_path, new_path, old_kind, new_kind) if result is DiffPath.CANNOT_DIFF: error_path = new_path if error_path is None: error_path = old_path raise errors.NoDiffFound(error_path) format_registry = Registry() format_registry.register('default', DiffTree) bzr-2.7.0/bzrlib/directory_service.py0000644000000000000000000001210411732353001016016 0ustar 00000000000000# Copyright (C) 2008, 2009, 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Directory service registration and usage. Directory services are utilities that provide a mapping from URL-like strings to true URLs. Examples include lp:urls and per-user location aliases. """ from __future__ import absolute_import from bzrlib import ( errors, registry, ) from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( branch as _mod_branch, controldir as _mod_controldir, urlutils, ) """) class DirectoryServiceRegistry(registry.Registry): """This object maintains and uses a list of directory services. Directory services may be registered via the standard Registry methods. They will be invoked if their key is a prefix of the supplied URL. Each item registered should be a factory of objects that provide a look_up method, as invoked by dereference. Specifically, look_up should accept a name and URL, and return a URL. """ def dereference(self, url): """Dereference a supplied URL if possible. URLs that match a registered directory service prefix are looked up in it. Non-matching urls are returned verbatim. This is applied only once; the resulting URL must not be one that requires further dereferencing. :param url: The URL to dereference :return: The dereferenced URL if applicable, the input URL otherwise. """ match = self.get_prefix(url) if match is None: return url service, name = match return service().look_up(name, url) directories = DirectoryServiceRegistry() class AliasDirectory(object): """Directory lookup for locations associated with a branch. :parent, :submit, :public, :push, :this, and :bound are currently supported. On error, a subclass of DirectoryLookupFailure will be raised. """ branch_aliases = registry.Registry() branch_aliases.register('parent', lambda b: b.get_parent(), help="The parent of this branch.") branch_aliases.register('submit', lambda b: b.get_submit_branch(), help="The submit branch for this branch.") branch_aliases.register('public', lambda b: b.get_public_branch(), help="The public location of this branch.") branch_aliases.register('bound', lambda b: b.get_bound_location(), help="The branch this branch is bound to, for bound branches.") branch_aliases.register('push', lambda b: b.get_push_location(), help="The saved location used for `bzr push` with no arguments.") branch_aliases.register('this', lambda b: b.base, help="This branch.") def look_up(self, name, url): branch = _mod_branch.Branch.open_containing('.')[0] parts = url.split('/', 1) if len(parts) == 2: name, extra = parts else: (name,) = parts extra = None try: method = self.branch_aliases.get(name[1:]) except KeyError: raise errors.InvalidLocationAlias(url) else: result = method(branch) if result is None: raise errors.UnsetLocationAlias(url) if extra is not None: result = urlutils.join(result, extra) return result @classmethod def help_text(cls, topic): alias_lines = [] for key in cls.branch_aliases.keys(): help = cls.branch_aliases.get_help(key) alias_lines.append(" :%-10s%s\n" % (key, help)) return """\ Location aliases ================ Bazaar defines several aliases for locations associated with a branch. These can be used with most commands that expect a location, such as `bzr push`. The aliases are:: %s For example, to push to the parent location:: bzr push :parent """ % "".join(alias_lines) directories.register(':', AliasDirectory, 'Easy access to remembered branch locations') class ColocatedDirectory(object): """Directory lookup for colocated branches. co:somename will resolve to the colocated branch with "somename" in the current directory. """ def look_up(self, name, url): dir = _mod_controldir.ControlDir.open_containing('.')[0] return urlutils.join_segment_parameters(dir.user_url, {"branch": urlutils.escape(name)}) directories.register('co:', ColocatedDirectory, 'Easy access to colocated branches') bzr-2.7.0/bzrlib/dirstate.py0000644000000000000000000062472612162032600014131 0ustar 00000000000000# Copyright (C) 2006-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """DirState objects record the state of a directory and its bzr metadata. Pseudo EBNF grammar for the state file. Fields are separated by NULLs, and lines by NL. The field delimiters are ommitted in the grammar, line delimiters are not - this is done for clarity of reading. All string data is in utf8. :: MINIKIND = "f" | "d" | "l" | "a" | "r" | "t"; NL = "\\n"; NULL = "\\0"; WHOLE_NUMBER = {digit}, digit; BOOLEAN = "y" | "n"; REVISION_ID = a non-empty utf8 string; dirstate format = header line, full checksum, row count, parent details, ghost_details, entries; header line = "#bazaar dirstate flat format 3", NL; full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL; row count = "num_entries: ", WHOLE_NUMBER, NL; parent_details = WHOLE NUMBER, {REVISION_ID}* NL; ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL; entries = {entry}; entry = entry_key, current_entry_details, {parent_entry_details}; entry_key = dirname, basename, fileid; current_entry_details = common_entry_details, working_entry_details; parent_entry_details = common_entry_details, history_entry_details; common_entry_details = MINIKIND, fingerprint, size, executable working_entry_details = packed_stat history_entry_details = REVISION_ID; executable = BOOLEAN; size = WHOLE_NUMBER; fingerprint = a nonempty utf8 sequence with meaning defined by minikind. Given this definition, the following is useful to know:: entry (aka row) - all the data for a given key. entry[0]: The key (dirname, basename, fileid) entry[0][0]: dirname entry[0][1]: basename entry[0][2]: fileid entry[1]: The tree(s) data for this path and id combination. entry[1][0]: The current tree entry[1][1]: The second tree For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:: entry[1][0][0]: minikind entry[1][0][1]: fingerprint entry[1][0][2]: size entry[1][0][3]: executable entry[1][0][4]: packed_stat OR (for non tree-0):: entry[1][1][4]: revision_id There may be multiple rows at the root, one per id present in the root, so the in memory root row is now:: self._dirblocks[0] -> ('', [entry ...]), and the entries in there are:: entries[0][0]: '' entries[0][1]: '' entries[0][2]: file_id entries[1][0]: The tree data for the current tree for this fileid at / etc. Kinds:: 'r' is a relocated entry: This path is not present in this tree with this id, but the id can be found at another location. The fingerprint is used to point to the target location. 'a' is an absent entry: In that tree the id is not present at this path. 'd' is a directory entry: This path in this tree is a directory with the current file id. There is no fingerprint for directories. 'f' is a file entry: As for directory, but it's a file. The fingerprint is the sha1 value of the file's canonical form, i.e. after any read filters have been applied to the convenience form stored in the working tree. 'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the link target. 't' is a reference to a nested subtree; the fingerprint is the referenced revision. Ordering: The entries on disk and in memory are ordered according to the following keys:: directory, as a list of components filename file-id --- Format 1 had the following different definition: --- :: rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL, WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target, {PARENT ROW} PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL, basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL, SHA1 PARENT ROW's are emitted for every parent that is not in the ghosts details line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then each row will have a PARENT ROW for foo and baz, but not for bar. In any tree, a kind of 'moved' indicates that the fingerprint field (which we treat as opaque data specific to the 'kind' anyway) has the details for the id of this row in that tree. I'm strongly tempted to add a id->path index as well, but I think that where we need id->path mapping; we also usually read the whole file, so I'm going to skip that for the moment, as we have the ability to locate via bisect any path in any tree, and if we lookup things by path, we can accumulate an id->path mapping as we go, which will tend to match what we looked for. I plan to implement this asap, so please speak up now to alter/tweak the design - and once we stabilise on this, I'll update the wiki page for it. The rationale for all this is that we want fast operations for the common case (diff/status/commit/merge on all files) and extremely fast operations for the less common but still occurs a lot status/diff/commit on specific files). Operations on specific files involve a scan for all the children of a path, *in every involved tree*, which the current format did not accommodate. ---- Design priorities: 1. Fast end to end use for bzr's top 5 uses cases. (commmit/diff/status/merge/???) 2. fall back current object model as needed. 3. scale usably to the largest trees known today - say 50K entries. (mozilla is an example of this) Locking: Eventually reuse dirstate objects across locks IFF the dirstate file has not been modified, but will require that we flush/ignore cached stat-hit data because we won't want to restat all files on disk just because a lock was acquired, yet we cannot trust the data after the previous lock was released. Memory representation:: vector of all directories, and vector of the childen ? i.e. root_entrie = (direntry for root, [parent_direntries_for_root]), dirblocks = [ ('', ['data for achild', 'data for bchild', 'data for cchild']) ('dir', ['achild', 'cchild', 'echild']) ] - single bisect to find N subtrees from a path spec - in-order for serialisation - this is 'dirblock' grouping. - insertion of a file '/a' affects only the '/' child-vector, that is, to insert 10K elements from scratch does not generates O(N^2) memoves of a single vector, rather each individual, which tends to be limited to a manageable number. Will scale badly on trees with 10K entries in a single directory. compare with Inventory.InventoryDirectory which has a dictionary for the children. No bisect capability, can only probe for exact matches, or grab all elements and sort. - What's the risk of error here? Once we have the base format being processed we should have a net win regardless of optimality. So we are going to go with what seems reasonable. open questions: Maybe we should do a test profile of the core structure - 10K simulated searches/lookups/etc? Objects for each row? The lifetime of Dirstate objects is current per lock, but see above for possible extensions. The lifetime of a row from a dirstate is expected to be very short in the optimistic case: which we are optimising for. For instance, subtree status will determine from analysis of the disk data what rows need to be examined at all, and will be able to determine from a single row whether that file has altered or not, so we are aiming to process tens of thousands of entries each second within the dirstate context, before exposing anything to the larger codebase. This suggests we want the time for a single file comparison to be < 0.1 milliseconds. That would give us 10000 paths per second processed, and to scale to 100 thousand we'll another order of magnitude to do that. Now, as the lifetime for all unchanged entries is the time to parse, stat the file on disk, and then immediately discard, the overhead of object creation becomes a significant cost. Figures: Creating a tuple from 3 elements was profiled at 0.0625 microseconds, whereas creating a object which is subclassed from tuple was 0.500 microseconds, and creating an object with 3 elements and slots was 3 microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get down to 10 microseconds for the total processing - having 33% of that be object creation is a huge overhead. There is a potential cost in using tuples within each row which is that the conditional code to do comparisons may be slower than method invocation, but method invocation is known to be slow due to stack frame creation, so avoiding methods in these tight inner loops in unfortunately desirable. We can consider a pyrex version of this with objects in future if desired. """ from __future__ import absolute_import import bisect import errno import operator import os from stat import S_IEXEC import stat import sys import time import zlib from bzrlib import ( cache_utf8, config, debug, errors, inventory, lock, osutils, static_tuple, trace, urlutils, ) # This is the Windows equivalent of ENOTDIR # It is defined in pywin32.winerror, but we don't want a strong dependency for # just an error code. ERROR_PATH_NOT_FOUND = 3 ERROR_DIRECTORY = 267 class SHA1Provider(object): """An interface for getting sha1s of a file.""" def sha1(self, abspath): """Return the sha1 of a file given its absolute path. :param abspath: May be a filesystem encoded absolute path or a unicode path. """ raise NotImplementedError(self.sha1) def stat_and_sha1(self, abspath): """Return the stat and sha1 of a file given its absolute path. :param abspath: May be a filesystem encoded absolute path or a unicode path. Note: the stat should be the stat of the physical file while the sha may be the sha of its canonical content. """ raise NotImplementedError(self.stat_and_sha1) class DefaultSHA1Provider(SHA1Provider): """A SHA1Provider that reads directly from the filesystem.""" def sha1(self, abspath): """Return the sha1 of a file given its absolute path.""" return osutils.sha_file_by_name(abspath) def stat_and_sha1(self, abspath): """Return the stat and sha1 of a file given its absolute path.""" file_obj = file(abspath, 'rb') try: statvalue = os.fstat(file_obj.fileno()) sha1 = osutils.sha_file(file_obj) finally: file_obj.close() return statvalue, sha1 class DirState(object): """Record directory and metadata state for fast access. A dirstate is a specialised data structure for managing local working tree state information. Its not yet well defined whether it is platform specific, and if it is how we detect/parameterize that. Dirstates use the usual lock_write, lock_read and unlock mechanisms. Unlike most bzr disk formats, DirStates must be locked for reading, using lock_read. (This is an os file lock internally.) This is necessary because the file can be rewritten in place. DirStates must be explicitly written with save() to commit changes; just unlocking them does not write the changes to disk. """ _kind_to_minikind = { 'absent': 'a', 'file': 'f', 'directory': 'd', 'relocated': 'r', 'symlink': 'l', 'tree-reference': 't', } _minikind_to_kind = { 'a': 'absent', 'f': 'file', 'd': 'directory', 'l':'symlink', 'r': 'relocated', 't': 'tree-reference', } _stat_to_minikind = { stat.S_IFDIR:'d', stat.S_IFREG:'f', stat.S_IFLNK:'l', } _to_yesno = {True:'y', False: 'n'} # TODO profile the performance gain # of using int conversion rather than a dict here. AND BLAME ANDREW IF # it is faster. # TODO: jam 20070221 Figure out what to do if we have a record that exceeds # the BISECT_PAGE_SIZE. For now, we just have to make it large enough # that we are sure a single record will always fit. BISECT_PAGE_SIZE = 4096 NOT_IN_MEMORY = 0 IN_MEMORY_UNMODIFIED = 1 IN_MEMORY_MODIFIED = 2 IN_MEMORY_HASH_MODIFIED = 3 # Only hash-cache updates # A pack_stat (the x's) that is just noise and will never match the output # of base64 encode. NULLSTAT = 'x' * 32 NULL_PARENT_DETAILS = static_tuple.StaticTuple('a', '', 0, False, '') HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n' HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n' def __init__(self, path, sha1_provider, worth_saving_limit=0): """Create a DirState object. :param path: The path at which the dirstate file on disk should live. :param sha1_provider: an object meeting the SHA1Provider interface. :param worth_saving_limit: when the exact number of hash changed entries is known, only bother saving the dirstate if more than this count of entries have changed. -1 means never save hash changes, 0 means always save hash changes. """ # _header_state and _dirblock_state represent the current state # of the dirstate metadata and the per-row data respectiely. # NOT_IN_MEMORY indicates that no data is in memory # IN_MEMORY_UNMODIFIED indicates that what we have in memory # is the same as is on disk # IN_MEMORY_MODIFIED indicates that we have a modified version # of what is on disk. # In future we will add more granularity, for instance _dirblock_state # will probably support partially-in-memory as a separate variable, # allowing for partially-in-memory unmodified and partially-in-memory # modified states. self._header_state = DirState.NOT_IN_MEMORY self._dirblock_state = DirState.NOT_IN_MEMORY # If true, an error has been detected while updating the dirstate, and # for safety we're not going to commit to disk. self._changes_aborted = False self._dirblocks = [] self._ghosts = [] self._parents = [] self._state_file = None self._filename = path self._lock_token = None self._lock_state = None self._id_index = None # a map from packed_stat to sha's. self._packed_stat_index = None self._end_of_header = None self._cutoff_time = None self._split_path_cache = {} self._bisect_page_size = DirState.BISECT_PAGE_SIZE self._sha1_provider = sha1_provider if 'hashcache' in debug.debug_flags: self._sha1_file = self._sha1_file_and_mutter else: self._sha1_file = self._sha1_provider.sha1 # These two attributes provide a simple cache for lookups into the # dirstate in-memory vectors. By probing respectively for the last # block, and for the next entry, we save nearly 2 bisections per path # during commit. self._last_block_index = None self._last_entry_index = None # The set of known hash changes self._known_hash_changes = set() # How many hash changed entries can we have without saving self._worth_saving_limit = worth_saving_limit self._config_stack = config.LocationStack(urlutils.local_path_to_url( path)) def __repr__(self): return "%s(%r)" % \ (self.__class__.__name__, self._filename) def _mark_modified(self, hash_changed_entries=None, header_modified=False): """Mark this dirstate as modified. :param hash_changed_entries: if non-None, mark just these entries as having their hash modified. :param header_modified: mark the header modified as well, not just the dirblocks. """ #trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries) if hash_changed_entries: self._known_hash_changes.update([e[0] for e in hash_changed_entries]) if self._dirblock_state in (DirState.NOT_IN_MEMORY, DirState.IN_MEMORY_UNMODIFIED): # If the dirstate is already marked a IN_MEMORY_MODIFIED, then # that takes precedence. self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED else: # TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we # should fail noisily if someone tries to set # IN_MEMORY_MODIFIED but we don't have a write-lock! # We don't know exactly what changed so disable smart saving self._dirblock_state = DirState.IN_MEMORY_MODIFIED if header_modified: self._header_state = DirState.IN_MEMORY_MODIFIED def _mark_unmodified(self): """Mark this dirstate as unmodified.""" self._header_state = DirState.IN_MEMORY_UNMODIFIED self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED self._known_hash_changes = set() def add(self, path, file_id, kind, stat, fingerprint): """Add a path to be tracked. :param path: The path within the dirstate - '' is the root, 'foo' is the path foo within the root, 'foo/bar' is the path bar within foo within the root. :param file_id: The file id of the path being added. :param kind: The kind of the path, as a string like 'file', 'directory', etc. :param stat: The output of os.lstat for the path. :param fingerprint: The sha value of the file's canonical form (i.e. after any read filters have been applied), or the target of a symlink, or the referenced revision id for tree-references, or '' for directories. """ # adding a file: # find the block its in. # find the location in the block. # check its not there # add it. #------- copied from inventory.ensure_normalized_name - keep synced. # --- normalized_filename wants a unicode basename only, so get one. dirname, basename = osutils.split(path) # we dont import normalized_filename directly because we want to be # able to change the implementation at runtime for tests. norm_name, can_access = osutils.normalized_filename(basename) if norm_name != basename: if can_access: basename = norm_name else: raise errors.InvalidNormalization(path) # you should never have files called . or ..; just add the directory # in the parent, or according to the special treatment for the root if basename == '.' or basename == '..': raise errors.InvalidEntryName(path) # now that we've normalised, we need the correct utf8 path and # dirname and basename elements. This single encode and split should be # faster than three separate encodes. utf8path = (dirname + '/' + basename).strip('/').encode('utf8') dirname, basename = osutils.split(utf8path) # uses __class__ for speed; the check is needed for safety if file_id.__class__ is not str: raise AssertionError( "must be a utf8 file_id not %s" % (type(file_id), )) # Make sure the file_id does not exist in this tree rename_from = None file_id_entry = self._get_entry(0, fileid_utf8=file_id, include_deleted=True) if file_id_entry != (None, None): if file_id_entry[1][0][0] == 'a': if file_id_entry[0] != (dirname, basename, file_id): # set the old name's current operation to rename self.update_minimal(file_id_entry[0], 'r', path_utf8='', packed_stat='', fingerprint=utf8path ) rename_from = file_id_entry[0][0:2] else: path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1]) kind = DirState._minikind_to_kind[file_id_entry[1][0][0]] info = '%s:%s' % (kind, path) raise errors.DuplicateFileId(file_id, info) first_key = (dirname, basename, '') block_index, present = self._find_block_index_from_key(first_key) if present: # check the path is not in the tree block = self._dirblocks[block_index][1] entry_index, _ = self._find_entry_index(first_key, block) while (entry_index < len(block) and block[entry_index][0][0:2] == first_key[0:2]): if block[entry_index][1][0][0] not in 'ar': # this path is in the dirstate in the current tree. raise Exception, "adding already added path!" entry_index += 1 else: # The block where we want to put the file is not present. But it # might be because the directory was empty, or not loaded yet. Look # for a parent entry, if not found, raise NotVersionedError parent_dir, parent_base = osutils.split(dirname) parent_block_idx, parent_entry_idx, _, parent_present = \ self._get_block_entry_index(parent_dir, parent_base, 0) if not parent_present: raise errors.NotVersionedError(path, str(self)) self._ensure_block(parent_block_idx, parent_entry_idx, dirname) block = self._dirblocks[block_index][1] entry_key = (dirname, basename, file_id) if stat is None: size = 0 packed_stat = DirState.NULLSTAT else: size = stat.st_size packed_stat = pack_stat(stat) parent_info = self._empty_parent_info() minikind = DirState._kind_to_minikind[kind] if rename_from is not None: if rename_from[0]: old_path_utf8 = '%s/%s' % rename_from else: old_path_utf8 = rename_from[1] parent_info[0] = ('r', old_path_utf8, 0, False, '') if kind == 'file': entry_data = entry_key, [ (minikind, fingerprint, size, False, packed_stat), ] + parent_info elif kind == 'directory': entry_data = entry_key, [ (minikind, '', 0, False, packed_stat), ] + parent_info elif kind == 'symlink': entry_data = entry_key, [ (minikind, fingerprint, size, False, packed_stat), ] + parent_info elif kind == 'tree-reference': entry_data = entry_key, [ (minikind, fingerprint, 0, False, packed_stat), ] + parent_info else: raise errors.BzrError('unknown kind %r' % kind) entry_index, present = self._find_entry_index(entry_key, block) if not present: block.insert(entry_index, entry_data) else: if block[entry_index][1][0][0] != 'a': raise AssertionError(" %r(%r) already added" % (basename, file_id)) block[entry_index][1][0] = entry_data[1][0] if kind == 'directory': # insert a new dirblock self._ensure_block(block_index, entry_index, utf8path) self._mark_modified() if self._id_index: self._add_to_id_index(self._id_index, entry_key) def _bisect(self, paths): """Bisect through the disk structure for specific rows. :param paths: A list of paths to find :return: A dict mapping path => entries for found entries. Missing entries will not be in the map. The list is not sorted, and entries will be populated based on when they were read. """ self._requires_lock() # We need the file pointer to be right after the initial header block self._read_header_if_needed() # If _dirblock_state was in memory, we should just return info from # there, this function is only meant to handle when we want to read # part of the disk. if self._dirblock_state != DirState.NOT_IN_MEMORY: raise AssertionError("bad dirblock state %r" % self._dirblock_state) # The disk representation is generally info + '\0\n\0' at the end. But # for bisecting, it is easier to treat this as '\0' + info + '\0\n' # Because it means we can sync on the '\n' state_file = self._state_file file_size = os.fstat(state_file.fileno()).st_size # We end up with 2 extra fields, we should have a trailing '\n' to # ensure that we read the whole record, and we should have a precursur # '' which ensures that we start after the previous '\n' entry_field_count = self._fields_per_entry() + 1 low = self._end_of_header high = file_size - 1 # Ignore the final '\0' # Map from (dir, name) => entry found = {} # Avoid infinite seeking max_count = 30*len(paths) count = 0 # pending is a list of places to look. # each entry is a tuple of low, high, dir_names # low -> the first byte offset to read (inclusive) # high -> the last byte offset (inclusive) # dir_names -> The list of (dir, name) pairs that should be found in # the [low, high] range pending = [(low, high, paths)] page_size = self._bisect_page_size fields_to_entry = self._get_fields_to_entry() while pending: low, high, cur_files = pending.pop() if not cur_files or low >= high: # Nothing to find continue count += 1 if count > max_count: raise errors.BzrError('Too many seeks, most likely a bug.') mid = max(low, (low+high-page_size)/2) state_file.seek(mid) # limit the read size, so we don't end up reading data that we have # already read. read_size = min(page_size, (high-mid)+1) block = state_file.read(read_size) start = mid entries = block.split('\n') if len(entries) < 2: # We didn't find a '\n', so we cannot have found any records. # So put this range back and try again. But we know we have to # increase the page size, because a single read did not contain # a record break (so records must be larger than page_size) page_size *= 2 pending.append((low, high, cur_files)) continue # Check the first and last entries, in case they are partial, or if # we don't care about the rest of this page first_entry_num = 0 first_fields = entries[0].split('\0') if len(first_fields) < entry_field_count: # We didn't get the complete first entry # so move start, and grab the next, which # should be a full entry start += len(entries[0])+1 first_fields = entries[1].split('\0') first_entry_num = 1 if len(first_fields) <= 2: # We didn't even get a filename here... what do we do? # Try a large page size and repeat this query page_size *= 2 pending.append((low, high, cur_files)) continue else: # Find what entries we are looking for, which occur before and # after this first record. after = start if first_fields[1]: first_path = first_fields[1] + '/' + first_fields[2] else: first_path = first_fields[2] first_loc = _bisect_path_left(cur_files, first_path) # These exist before the current location pre = cur_files[:first_loc] # These occur after the current location, which may be in the # data we read, or might be after the last entry post = cur_files[first_loc:] if post and len(first_fields) >= entry_field_count: # We have files after the first entry # Parse the last entry last_entry_num = len(entries)-1 last_fields = entries[last_entry_num].split('\0') if len(last_fields) < entry_field_count: # The very last hunk was not complete, # read the previous hunk after = mid + len(block) - len(entries[-1]) last_entry_num -= 1 last_fields = entries[last_entry_num].split('\0') else: after = mid + len(block) if last_fields[1]: last_path = last_fields[1] + '/' + last_fields[2] else: last_path = last_fields[2] last_loc = _bisect_path_right(post, last_path) middle_files = post[:last_loc] post = post[last_loc:] if middle_files: # We have files that should occur in this block # (>= first, <= last) # Either we will find them here, or we can mark them as # missing. if middle_files[0] == first_path: # We might need to go before this location pre.append(first_path) if middle_files[-1] == last_path: post.insert(0, last_path) # Find out what paths we have paths = {first_path:[first_fields]} # last_path might == first_path so we need to be # careful if we should append rather than overwrite if last_entry_num != first_entry_num: paths.setdefault(last_path, []).append(last_fields) for num in xrange(first_entry_num+1, last_entry_num): # TODO: jam 20070223 We are already splitting here, so # shouldn't we just split the whole thing rather # than doing the split again in add_one_record? fields = entries[num].split('\0') if fields[1]: path = fields[1] + '/' + fields[2] else: path = fields[2] paths.setdefault(path, []).append(fields) for path in middle_files: for fields in paths.get(path, []): # offset by 1 because of the opening '\0' # consider changing fields_to_entry to avoid the # extra list slice entry = fields_to_entry(fields[1:]) found.setdefault(path, []).append(entry) # Now we have split up everything into pre, middle, and post, and # we have handled everything that fell in 'middle'. # We add 'post' first, so that we prefer to seek towards the # beginning, so that we will tend to go as early as we need, and # then only seek forward after that. if post: pending.append((after, high, post)) if pre: pending.append((low, start-1, pre)) # Consider that we may want to return the directory entries in sorted # order. For now, we just return them in whatever order we found them, # and leave it up to the caller if they care if it is ordered or not. return found def _bisect_dirblocks(self, dir_list): """Bisect through the disk structure to find entries in given dirs. _bisect_dirblocks is meant to find the contents of directories, which differs from _bisect, which only finds individual entries. :param dir_list: A sorted list of directory names ['', 'dir', 'foo']. :return: A map from dir => entries_for_dir """ # TODO: jam 20070223 A lot of the bisecting logic could be shared # between this and _bisect. It would require parameterizing the # inner loop with a function, though. We should evaluate the # performance difference. self._requires_lock() # We need the file pointer to be right after the initial header block self._read_header_if_needed() # If _dirblock_state was in memory, we should just return info from # there, this function is only meant to handle when we want to read # part of the disk. if self._dirblock_state != DirState.NOT_IN_MEMORY: raise AssertionError("bad dirblock state %r" % self._dirblock_state) # The disk representation is generally info + '\0\n\0' at the end. But # for bisecting, it is easier to treat this as '\0' + info + '\0\n' # Because it means we can sync on the '\n' state_file = self._state_file file_size = os.fstat(state_file.fileno()).st_size # We end up with 2 extra fields, we should have a trailing '\n' to # ensure that we read the whole record, and we should have a precursur # '' which ensures that we start after the previous '\n' entry_field_count = self._fields_per_entry() + 1 low = self._end_of_header high = file_size - 1 # Ignore the final '\0' # Map from dir => entry found = {} # Avoid infinite seeking max_count = 30*len(dir_list) count = 0 # pending is a list of places to look. # each entry is a tuple of low, high, dir_names # low -> the first byte offset to read (inclusive) # high -> the last byte offset (inclusive) # dirs -> The list of directories that should be found in # the [low, high] range pending = [(low, high, dir_list)] page_size = self._bisect_page_size fields_to_entry = self._get_fields_to_entry() while pending: low, high, cur_dirs = pending.pop() if not cur_dirs or low >= high: # Nothing to find continue count += 1 if count > max_count: raise errors.BzrError('Too many seeks, most likely a bug.') mid = max(low, (low+high-page_size)/2) state_file.seek(mid) # limit the read size, so we don't end up reading data that we have # already read. read_size = min(page_size, (high-mid)+1) block = state_file.read(read_size) start = mid entries = block.split('\n') if len(entries) < 2: # We didn't find a '\n', so we cannot have found any records. # So put this range back and try again. But we know we have to # increase the page size, because a single read did not contain # a record break (so records must be larger than page_size) page_size *= 2 pending.append((low, high, cur_dirs)) continue # Check the first and last entries, in case they are partial, or if # we don't care about the rest of this page first_entry_num = 0 first_fields = entries[0].split('\0') if len(first_fields) < entry_field_count: # We didn't get the complete first entry # so move start, and grab the next, which # should be a full entry start += len(entries[0])+1 first_fields = entries[1].split('\0') first_entry_num = 1 if len(first_fields) <= 1: # We didn't even get a dirname here... what do we do? # Try a large page size and repeat this query page_size *= 2 pending.append((low, high, cur_dirs)) continue else: # Find what entries we are looking for, which occur before and # after this first record. after = start first_dir = first_fields[1] first_loc = bisect.bisect_left(cur_dirs, first_dir) # These exist before the current location pre = cur_dirs[:first_loc] # These occur after the current location, which may be in the # data we read, or might be after the last entry post = cur_dirs[first_loc:] if post and len(first_fields) >= entry_field_count: # We have records to look at after the first entry # Parse the last entry last_entry_num = len(entries)-1 last_fields = entries[last_entry_num].split('\0') if len(last_fields) < entry_field_count: # The very last hunk was not complete, # read the previous hunk after = mid + len(block) - len(entries[-1]) last_entry_num -= 1 last_fields = entries[last_entry_num].split('\0') else: after = mid + len(block) last_dir = last_fields[1] last_loc = bisect.bisect_right(post, last_dir) middle_files = post[:last_loc] post = post[last_loc:] if middle_files: # We have files that should occur in this block # (>= first, <= last) # Either we will find them here, or we can mark them as # missing. if middle_files[0] == first_dir: # We might need to go before this location pre.append(first_dir) if middle_files[-1] == last_dir: post.insert(0, last_dir) # Find out what paths we have paths = {first_dir:[first_fields]} # last_dir might == first_dir so we need to be # careful if we should append rather than overwrite if last_entry_num != first_entry_num: paths.setdefault(last_dir, []).append(last_fields) for num in xrange(first_entry_num+1, last_entry_num): # TODO: jam 20070223 We are already splitting here, so # shouldn't we just split the whole thing rather # than doing the split again in add_one_record? fields = entries[num].split('\0') paths.setdefault(fields[1], []).append(fields) for cur_dir in middle_files: for fields in paths.get(cur_dir, []): # offset by 1 because of the opening '\0' # consider changing fields_to_entry to avoid the # extra list slice entry = fields_to_entry(fields[1:]) found.setdefault(cur_dir, []).append(entry) # Now we have split up everything into pre, middle, and post, and # we have handled everything that fell in 'middle'. # We add 'post' first, so that we prefer to seek towards the # beginning, so that we will tend to go as early as we need, and # then only seek forward after that. if post: pending.append((after, high, post)) if pre: pending.append((low, start-1, pre)) return found def _bisect_recursive(self, paths): """Bisect for entries for all paths and their children. This will use bisect to find all records for the supplied paths. It will then continue to bisect for any records which are marked as directories. (and renames?) :param paths: A sorted list of (dir, name) pairs eg: [('', 'a'), ('', 'f'), ('a/b', 'c')] :return: A dictionary mapping (dir, name, file_id) => [tree_info] """ # Map from (dir, name, file_id) => [tree_info] found = {} found_dir_names = set() # Directories that have been read processed_dirs = set() # Get the ball rolling with the first bisect for all entries. newly_found = self._bisect(paths) while newly_found: # Directories that need to be read pending_dirs = set() paths_to_search = set() for entry_list in newly_found.itervalues(): for dir_name_id, trees_info in entry_list: found[dir_name_id] = trees_info found_dir_names.add(dir_name_id[:2]) is_dir = False for tree_info in trees_info: minikind = tree_info[0] if minikind == 'd': if is_dir: # We already processed this one as a directory, # we don't need to do the extra work again. continue subdir, name, file_id = dir_name_id path = osutils.pathjoin(subdir, name) is_dir = True if path not in processed_dirs: pending_dirs.add(path) elif minikind == 'r': # Rename, we need to directly search the target # which is contained in the fingerprint column dir_name = osutils.split(tree_info[1]) if dir_name[0] in pending_dirs: # This entry will be found in the dir search continue if dir_name not in found_dir_names: paths_to_search.add(tree_info[1]) # Now we have a list of paths to look for directly, and # directory blocks that need to be read. # newly_found is mixing the keys between (dir, name) and path # entries, but that is okay, because we only really care about the # targets. newly_found = self._bisect(sorted(paths_to_search)) newly_found.update(self._bisect_dirblocks(sorted(pending_dirs))) processed_dirs.update(pending_dirs) return found def _discard_merge_parents(self): """Discard any parents trees beyond the first. Note that if this fails the dirstate is corrupted. After this function returns the dirstate contains 2 trees, neither of which are ghosted. """ self._read_header_if_needed() parents = self.get_parent_ids() if len(parents) < 1: return # only require all dirblocks if we are doing a full-pass removal. self._read_dirblocks_if_needed() dead_patterns = set([('a', 'r'), ('a', 'a'), ('r', 'r'), ('r', 'a')]) def iter_entries_removable(): for block in self._dirblocks: deleted_positions = [] for pos, entry in enumerate(block[1]): yield entry if (entry[1][0][0], entry[1][1][0]) in dead_patterns: deleted_positions.append(pos) if deleted_positions: if len(deleted_positions) == len(block[1]): del block[1][:] else: for pos in reversed(deleted_positions): del block[1][pos] # if the first parent is a ghost: if parents[0] in self.get_ghosts(): empty_parent = [DirState.NULL_PARENT_DETAILS] for entry in iter_entries_removable(): entry[1][1:] = empty_parent else: for entry in iter_entries_removable(): del entry[1][2:] self._ghosts = [] self._parents = [parents[0]] self._mark_modified(header_modified=True) def _empty_parent_info(self): return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) - len(self._ghosts)) def _ensure_block(self, parent_block_index, parent_row_index, dirname): """Ensure a block for dirname exists. This function exists to let callers which know that there is a directory dirname ensure that the block for it exists. This block can fail to exist because of demand loading, or because a directory had no children. In either case it is not an error. It is however an error to call this if there is no parent entry for the directory, and thus the function requires the coordinates of such an entry to be provided. The root row is special cased and can be indicated with a parent block and row index of -1 :param parent_block_index: The index of the block in which dirname's row exists. :param parent_row_index: The index in the parent block where the row exists. :param dirname: The utf8 dirname to ensure there is a block for. :return: The index for the block. """ if dirname == '' and parent_row_index == 0 and parent_block_index == 0: # This is the signature of the root row, and the # contents-of-root row is always index 1 return 1 # the basename of the directory must be the end of its full name. if not (parent_block_index == -1 and parent_block_index == -1 and dirname == ''): if not dirname.endswith( self._dirblocks[parent_block_index][1][parent_row_index][0][1]): raise AssertionError("bad dirname %r" % dirname) block_index, present = self._find_block_index_from_key((dirname, '', '')) if not present: ## In future, when doing partial parsing, this should load and # populate the entire block. self._dirblocks.insert(block_index, (dirname, [])) return block_index def _entries_to_current_state(self, new_entries): """Load new_entries into self.dirblocks. Process new_entries into the current state object, making them the active state. The entries are grouped together by directory to form dirblocks. :param new_entries: A sorted list of entries. This function does not sort to prevent unneeded overhead when callers have a sorted list already. :return: Nothing. """ if new_entries[0][0][0:2] != ('', ''): raise AssertionError( "Missing root row %r" % (new_entries[0][0],)) # The two blocks here are deliberate: the root block and the # contents-of-root block. self._dirblocks = [('', []), ('', [])] current_block = self._dirblocks[0][1] current_dirname = '' root_key = ('', '') append_entry = current_block.append for entry in new_entries: if entry[0][0] != current_dirname: # new block - different dirname current_block = [] current_dirname = entry[0][0] self._dirblocks.append((current_dirname, current_block)) append_entry = current_block.append # append the entry to the current block append_entry(entry) self._split_root_dirblock_into_contents() def _split_root_dirblock_into_contents(self): """Split the root dirblocks into root and contents-of-root. After parsing by path, we end up with root entries and contents-of-root entries in the same block. This loop splits them out again. """ # The above loop leaves the "root block" entries mixed with the # "contents-of-root block". But we don't want an if check on # all entries, so instead we just fix it up here. if self._dirblocks[1] != ('', []): raise ValueError("bad dirblock start %r" % (self._dirblocks[1],)) root_block = [] contents_of_root_block = [] for entry in self._dirblocks[0][1]: if not entry[0][1]: # This is a root entry root_block.append(entry) else: contents_of_root_block.append(entry) self._dirblocks[0] = ('', root_block) self._dirblocks[1] = ('', contents_of_root_block) def _entries_for_path(self, path): """Return a list with all the entries that match path for all ids.""" dirname, basename = os.path.split(path) key = (dirname, basename, '') block_index, present = self._find_block_index_from_key(key) if not present: # the block which should contain path is absent. return [] result = [] block = self._dirblocks[block_index][1] entry_index, _ = self._find_entry_index(key, block) # we may need to look at multiple entries at this path: walk while the specific_files match. while (entry_index < len(block) and block[entry_index][0][0:2] == key[0:2]): result.append(block[entry_index]) entry_index += 1 return result def _entry_to_line(self, entry): """Serialize entry to a NULL delimited line ready for _get_output_lines. :param entry: An entry_tuple as defined in the module docstring. """ entire_entry = list(entry[0]) for tree_number, tree_data in enumerate(entry[1]): # (minikind, fingerprint, size, executable, tree_specific_string) entire_entry.extend(tree_data) # 3 for the key, 5 for the fields per tree. tree_offset = 3 + tree_number * 5 # minikind entire_entry[tree_offset + 0] = tree_data[0] # size entire_entry[tree_offset + 2] = str(tree_data[2]) # executable entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]] return '\0'.join(entire_entry) def _fields_per_entry(self): """How many null separated fields should be in each entry row. Each line now has an extra '\\n' field which is not used so we just skip over it entry size:: 3 fields for the key + number of fields per tree_data (5) * tree count + newline """ tree_count = 1 + self._num_present_parents() return 3 + 5 * tree_count + 1 def _find_block(self, key, add_if_missing=False): """Return the block that key should be present in. :param key: A dirstate entry key. :return: The block tuple. """ block_index, present = self._find_block_index_from_key(key) if not present: if not add_if_missing: # check to see if key is versioned itself - we might want to # add it anyway, because dirs with no entries dont get a # dirblock at parse time. # This is an uncommon branch to take: most dirs have children, # and most code works with versioned paths. parent_base, parent_name = osutils.split(key[0]) if not self._get_block_entry_index(parent_base, parent_name, 0)[3]: # some parent path has not been added - its an error to add # this child raise errors.NotVersionedError(key[0:2], str(self)) self._dirblocks.insert(block_index, (key[0], [])) return self._dirblocks[block_index] def _find_block_index_from_key(self, key): """Find the dirblock index for a key. :return: The block index, True if the block for the key is present. """ if key[0:2] == ('', ''): return 0, True try: if (self._last_block_index is not None and self._dirblocks[self._last_block_index][0] == key[0]): return self._last_block_index, True except IndexError: pass block_index = bisect_dirblock(self._dirblocks, key[0], 1, cache=self._split_path_cache) # _right returns one-past-where-key is so we have to subtract # one to use it. we use _right here because there are two # '' blocks - the root, and the contents of root # we always have a minimum of 2 in self._dirblocks: root and # root-contents, and for '', we get 2 back, so this is # simple and correct: present = (block_index < len(self._dirblocks) and self._dirblocks[block_index][0] == key[0]) self._last_block_index = block_index # Reset the entry index cache to the beginning of the block. self._last_entry_index = -1 return block_index, present def _find_entry_index(self, key, block): """Find the entry index for a key in a block. :return: The entry index, True if the entry for the key is present. """ len_block = len(block) try: if self._last_entry_index is not None: # mini-bisect here. entry_index = self._last_entry_index + 1 # A hit is when the key is after the last slot, and before or # equal to the next slot. if ((entry_index > 0 and block[entry_index - 1][0] < key) and key <= block[entry_index][0]): self._last_entry_index = entry_index present = (block[entry_index][0] == key) return entry_index, present except IndexError: pass entry_index = bisect.bisect_left(block, (key, [])) present = (entry_index < len_block and block[entry_index][0] == key) self._last_entry_index = entry_index return entry_index, present @staticmethod def from_tree(tree, dir_state_filename, sha1_provider=None): """Create a dirstate from a bzr Tree. :param tree: The tree which should provide parent information and inventory ids. :param sha1_provider: an object meeting the SHA1Provider interface. If None, a DefaultSHA1Provider is used. :return: a DirState object which is currently locked for writing. (it was locked by DirState.initialize) """ result = DirState.initialize(dir_state_filename, sha1_provider=sha1_provider) try: tree.lock_read() try: parent_ids = tree.get_parent_ids() num_parents = len(parent_ids) parent_trees = [] for parent_id in parent_ids: parent_tree = tree.branch.repository.revision_tree(parent_id) parent_trees.append((parent_id, parent_tree)) parent_tree.lock_read() result.set_parent_trees(parent_trees, []) result.set_state_from_inventory(tree.root_inventory) finally: for revid, parent_tree in parent_trees: parent_tree.unlock() tree.unlock() except: # The caller won't have a chance to unlock this, so make sure we # cleanup ourselves result.unlock() raise return result def _check_delta_is_valid(self, delta): return list(inventory._check_delta_unique_ids( inventory._check_delta_unique_old_paths( inventory._check_delta_unique_new_paths( inventory._check_delta_ids_match_entry( inventory._check_delta_ids_are_valid( inventory._check_delta_new_path_entry_both_or_None(delta))))))) def update_by_delta(self, delta): """Apply an inventory delta to the dirstate for tree 0 This is the workhorse for apply_inventory_delta in dirstate based trees. :param delta: An inventory delta. See Inventory.apply_delta for details. """ self._read_dirblocks_if_needed() encode = cache_utf8.encode insertions = {} removals = {} # Accumulate parent references (path_utf8, id), to check for parentless # items or items placed under files/links/tree-references. We get # references from every item in the delta that is not a deletion and # is not itself the root. parents = set() # Added ids must not be in the dirstate already. This set holds those # ids. new_ids = set() # This loop transforms the delta to single atomic operations that can # be executed and validated. delta = sorted(self._check_delta_is_valid(delta), reverse=True) for old_path, new_path, file_id, inv_entry in delta: if (file_id in insertions) or (file_id in removals): self._raise_invalid(old_path or new_path, file_id, "repeated file_id") if old_path is not None: old_path = old_path.encode('utf-8') removals[file_id] = old_path else: new_ids.add(file_id) if new_path is not None: if inv_entry is None: self._raise_invalid(new_path, file_id, "new_path with no entry") new_path = new_path.encode('utf-8') dirname_utf8, basename = osutils.split(new_path) if basename: parents.add((dirname_utf8, inv_entry.parent_id)) key = (dirname_utf8, basename, file_id) minikind = DirState._kind_to_minikind[inv_entry.kind] if minikind == 't': fingerprint = inv_entry.reference_revision or '' else: fingerprint = '' insertions[file_id] = (key, minikind, inv_entry.executable, fingerprint, new_path) # Transform moves into delete+add pairs if None not in (old_path, new_path): for child in self._iter_child_entries(0, old_path): if child[0][2] in insertions or child[0][2] in removals: continue child_dirname = child[0][0] child_basename = child[0][1] minikind = child[1][0][0] fingerprint = child[1][0][4] executable = child[1][0][3] old_child_path = osutils.pathjoin(child_dirname, child_basename) removals[child[0][2]] = old_child_path child_suffix = child_dirname[len(old_path):] new_child_dirname = (new_path + child_suffix) key = (new_child_dirname, child_basename, child[0][2]) new_child_path = osutils.pathjoin(new_child_dirname, child_basename) insertions[child[0][2]] = (key, minikind, executable, fingerprint, new_child_path) self._check_delta_ids_absent(new_ids, delta, 0) try: self._apply_removals(removals.iteritems()) self._apply_insertions(insertions.values()) # Validate parents self._after_delta_check_parents(parents, 0) except errors.BzrError, e: self._changes_aborted = True if 'integrity error' not in str(e): raise # _get_entry raises BzrError when a request is inconsistent; we # want such errors to be shown as InconsistentDelta - and that # fits the behaviour we trigger. raise errors.InconsistentDeltaDelta(delta, "error from _get_entry. %s" % (e,)) def _apply_removals(self, removals): for file_id, path in sorted(removals, reverse=True, key=operator.itemgetter(1)): dirname, basename = osutils.split(path) block_i, entry_i, d_present, f_present = \ self._get_block_entry_index(dirname, basename, 0) try: entry = self._dirblocks[block_i][1][entry_i] except IndexError: self._raise_invalid(path, file_id, "Wrong path for old path.") if not f_present or entry[1][0][0] in 'ar': self._raise_invalid(path, file_id, "Wrong path for old path.") if file_id != entry[0][2]: self._raise_invalid(path, file_id, "Attempt to remove path has wrong id - found %r." % entry[0][2]) self._make_absent(entry) # See if we have a malformed delta: deleting a directory must not # leave crud behind. This increases the number of bisects needed # substantially, but deletion or renames of large numbers of paths # is rare enough it shouldn't be an issue (famous last words?) RBC # 20080730. block_i, entry_i, d_present, f_present = \ self._get_block_entry_index(path, '', 0) if d_present: # The dir block is still present in the dirstate; this could # be due to it being in a parent tree, or a corrupt delta. for child_entry in self._dirblocks[block_i][1]: if child_entry[1][0][0] not in ('r', 'a'): self._raise_invalid(path, entry[0][2], "The file id was deleted but its children were " "not deleted.") def _apply_insertions(self, adds): try: for key, minikind, executable, fingerprint, path_utf8 in sorted(adds): self.update_minimal(key, minikind, executable, fingerprint, path_utf8=path_utf8) except errors.NotVersionedError: self._raise_invalid(path_utf8.decode('utf8'), key[2], "Missing parent") def update_basis_by_delta(self, delta, new_revid): """Update the parents of this tree after a commit. This gives the tree one parent, with revision id new_revid. The inventory delta is applied to the current basis tree to generate the inventory for the parent new_revid, and all other parent trees are discarded. Note that an exception during the operation of this method will leave the dirstate in a corrupt state where it should not be saved. :param new_revid: The new revision id for the trees parent. :param delta: An inventory delta (see apply_inventory_delta) describing the changes from the current left most parent revision to new_revid. """ self._read_dirblocks_if_needed() self._discard_merge_parents() if self._ghosts != []: raise NotImplementedError(self.update_basis_by_delta) if len(self._parents) == 0: # setup a blank tree, the most simple way. empty_parent = DirState.NULL_PARENT_DETAILS for entry in self._iter_entries(): entry[1].append(empty_parent) self._parents.append(new_revid) self._parents[0] = new_revid delta = sorted(self._check_delta_is_valid(delta), reverse=True) adds = [] changes = [] deletes = [] # The paths this function accepts are unicode and must be encoded as we # go. encode = cache_utf8.encode inv_to_entry = self._inv_entry_to_details # delta is now (deletes, changes), (adds) in reverse lexographical # order. # deletes in reverse lexographic order are safe to process in situ. # renames are not, as a rename from any path could go to a path # lexographically lower, so we transform renames into delete, add pairs, # expanding them recursively as needed. # At the same time, to reduce interface friction we convert the input # inventory entries to dirstate. root_only = ('', '') # Accumulate parent references (path_utf8, id), to check for parentless # items or items placed under files/links/tree-references. We get # references from every item in the delta that is not a deletion and # is not itself the root. parents = set() # Added ids must not be in the dirstate already. This set holds those # ids. new_ids = set() for old_path, new_path, file_id, inv_entry in delta: if inv_entry is not None and file_id != inv_entry.file_id: self._raise_invalid(new_path, file_id, "mismatched entry file_id %r" % inv_entry) if new_path is None: new_path_utf8 = None else: if inv_entry is None: self._raise_invalid(new_path, file_id, "new_path with no entry") new_path_utf8 = encode(new_path) # note the parent for validation dirname_utf8, basename_utf8 = osutils.split(new_path_utf8) if basename_utf8: parents.add((dirname_utf8, inv_entry.parent_id)) if old_path is None: old_path_utf8 = None else: old_path_utf8 = encode(old_path) if old_path is None: adds.append((None, new_path_utf8, file_id, inv_to_entry(inv_entry), True)) new_ids.add(file_id) elif new_path is None: deletes.append((old_path_utf8, None, file_id, None, True)) elif (old_path, new_path) == root_only: # change things in-place # Note: the case of a parent directory changing its file_id # tends to break optimizations here, because officially # the file has actually been moved, it just happens to # end up at the same path. If we can figure out how to # handle that case, we can avoid a lot of add+delete # pairs for objects that stay put. # elif old_path == new_path: changes.append((old_path_utf8, new_path_utf8, file_id, inv_to_entry(inv_entry))) else: # Renames: # Because renames must preserve their children we must have # processed all relocations and removes before hand. The sort # order ensures we've examined the child paths, but we also # have to execute the removals, or the split to an add/delete # pair will result in the deleted item being reinserted, or # renamed items being reinserted twice - and possibly at the # wrong place. Splitting into a delete/add pair also simplifies # the handling of entries with ('f', ...), ('r' ...) because # the target of the 'r' is old_path here, and we add that to # deletes, meaning that the add handler does not need to check # for 'r' items on every pass. self._update_basis_apply_deletes(deletes) deletes = [] # Split into an add/delete pair recursively. adds.append((old_path_utf8, new_path_utf8, file_id, inv_to_entry(inv_entry), False)) # Expunge deletes that we've seen so that deleted/renamed # children of a rename directory are handled correctly. new_deletes = reversed(list( self._iter_child_entries(1, old_path_utf8))) # Remove the current contents of the tree at orig_path, and # reinsert at the correct new path. for entry in new_deletes: child_dirname, child_basename, child_file_id = entry[0] if child_dirname: source_path = child_dirname + '/' + child_basename else: source_path = child_basename if new_path_utf8: target_path = \ new_path_utf8 + source_path[len(old_path_utf8):] else: if old_path_utf8 == '': raise AssertionError("cannot rename directory to" " itself") target_path = source_path[len(old_path_utf8) + 1:] adds.append((None, target_path, entry[0][2], entry[1][1], False)) deletes.append( (source_path, target_path, entry[0][2], None, False)) deletes.append( (old_path_utf8, new_path_utf8, file_id, None, False)) self._check_delta_ids_absent(new_ids, delta, 1) try: # Finish expunging deletes/first half of renames. self._update_basis_apply_deletes(deletes) # Reinstate second half of renames and new paths. self._update_basis_apply_adds(adds) # Apply in-situ changes. self._update_basis_apply_changes(changes) # Validate parents self._after_delta_check_parents(parents, 1) except errors.BzrError, e: self._changes_aborted = True if 'integrity error' not in str(e): raise # _get_entry raises BzrError when a request is inconsistent; we # want such errors to be shown as InconsistentDelta - and that # fits the behaviour we trigger. raise errors.InconsistentDeltaDelta(delta, "error from _get_entry. %s" % (e,)) self._mark_modified(header_modified=True) self._id_index = None return def _check_delta_ids_absent(self, new_ids, delta, tree_index): """Check that none of the file_ids in new_ids are present in a tree.""" if not new_ids: return id_index = self._get_id_index() for file_id in new_ids: for key in id_index.get(file_id, ()): block_i, entry_i, d_present, f_present = \ self._get_block_entry_index(key[0], key[1], tree_index) if not f_present: # In a different tree continue entry = self._dirblocks[block_i][1][entry_i] if entry[0][2] != file_id: # Different file_id, so not what we want. continue self._raise_invalid(("%s/%s" % key[0:2]).decode('utf8'), file_id, "This file_id is new in the delta but already present in " "the target") def _raise_invalid(self, path, file_id, reason): self._changes_aborted = True raise errors.InconsistentDelta(path, file_id, reason) def _update_basis_apply_adds(self, adds): """Apply a sequence of adds to tree 1 during update_basis_by_delta. They may be adds, or renames that have been split into add/delete pairs. :param adds: A sequence of adds. Each add is a tuple: (None, new_path_utf8, file_id, (entry_details), real_add). real_add is False when the add is the second half of a remove-and-reinsert pair created to handle renames and deletes. """ # Adds are accumulated partly from renames, so can be in any input # order - sort it. # TODO: we may want to sort in dirblocks order. That way each entry # will end up in the same directory, allowing the _get_entry # fast-path for looking up 2 items in the same dir work. adds.sort(key=lambda x: x[1]) # adds is now in lexographic order, which places all parents before # their children, so we can process it linearly. absent = 'ar' st = static_tuple.StaticTuple for old_path, new_path, file_id, new_details, real_add in adds: dirname, basename = osutils.split(new_path) entry_key = st(dirname, basename, file_id) block_index, present = self._find_block_index_from_key(entry_key) if not present: # The block where we want to put the file is not present. # However, it might have just been an empty directory. Look for # the parent in the basis-so-far before throwing an error. parent_dir, parent_base = osutils.split(dirname) parent_block_idx, parent_entry_idx, _, parent_present = \ self._get_block_entry_index(parent_dir, parent_base, 1) if not parent_present: self._raise_invalid(new_path, file_id, "Unable to find block for this record." " Was the parent added?") self._ensure_block(parent_block_idx, parent_entry_idx, dirname) block = self._dirblocks[block_index][1] entry_index, present = self._find_entry_index(entry_key, block) if real_add: if old_path is not None: self._raise_invalid(new_path, file_id, 'considered a real add but still had old_path at %s' % (old_path,)) if present: entry = block[entry_index] basis_kind = entry[1][1][0] if basis_kind == 'a': entry[1][1] = new_details elif basis_kind == 'r': raise NotImplementedError() else: self._raise_invalid(new_path, file_id, "An entry was marked as a new add" " but the basis target already existed") else: # The exact key was not found in the block. However, we need to # check if there is a key next to us that would have matched. # We only need to check 2 locations, because there are only 2 # trees present. for maybe_index in range(entry_index-1, entry_index+1): if maybe_index < 0 or maybe_index >= len(block): continue maybe_entry = block[maybe_index] if maybe_entry[0][:2] != (dirname, basename): # Just a random neighbor continue if maybe_entry[0][2] == file_id: raise AssertionError( '_find_entry_index didnt find a key match' ' but walking the data did, for %s' % (entry_key,)) basis_kind = maybe_entry[1][1][0] if basis_kind not in 'ar': self._raise_invalid(new_path, file_id, "we have an add record for path, but the path" " is already present with another file_id %s" % (maybe_entry[0][2],)) entry = (entry_key, [DirState.NULL_PARENT_DETAILS, new_details]) block.insert(entry_index, entry) active_kind = entry[1][0][0] if active_kind == 'a': # The active record shows up as absent, this could be genuine, # or it could be present at some other location. We need to # verify. id_index = self._get_id_index() # The id_index may not be perfectly accurate for tree1, because # we haven't been keeping it updated. However, it should be # fine for tree0, and that gives us enough info for what we # need keys = id_index.get(file_id, ()) for key in keys: block_i, entry_i, d_present, f_present = \ self._get_block_entry_index(key[0], key[1], 0) if not f_present: continue active_entry = self._dirblocks[block_i][1][entry_i] if (active_entry[0][2] != file_id): # Some other file is at this path, we don't need to # link it. continue real_active_kind = active_entry[1][0][0] if real_active_kind in 'ar': # We found a record, which was not *this* record, # which matches the file_id, but is not actually # present. Something seems *really* wrong. self._raise_invalid(new_path, file_id, "We found a tree0 entry that doesnt make sense") # Now, we've found a tree0 entry which matches the file_id # but is at a different location. So update them to be # rename records. active_dir, active_name = active_entry[0][:2] if active_dir: active_path = active_dir + '/' + active_name else: active_path = active_name active_entry[1][1] = st('r', new_path, 0, False, '') entry[1][0] = st('r', active_path, 0, False, '') elif active_kind == 'r': raise NotImplementedError() new_kind = new_details[0] if new_kind == 'd': self._ensure_block(block_index, entry_index, new_path) def _update_basis_apply_changes(self, changes): """Apply a sequence of changes to tree 1 during update_basis_by_delta. :param adds: A sequence of changes. Each change is a tuple: (path_utf8, path_utf8, file_id, (entry_details)) """ absent = 'ar' for old_path, new_path, file_id, new_details in changes: # the entry for this file_id must be in tree 0. entry = self._get_entry(1, file_id, new_path) if entry[0] is None or entry[1][1][0] in 'ar': self._raise_invalid(new_path, file_id, 'changed entry considered not present') entry[1][1] = new_details def _update_basis_apply_deletes(self, deletes): """Apply a sequence of deletes to tree 1 during update_basis_by_delta. They may be deletes, or renames that have been split into add/delete pairs. :param deletes: A sequence of deletes. Each delete is a tuple: (old_path_utf8, new_path_utf8, file_id, None, real_delete). real_delete is True when the desired outcome is an actual deletion rather than the rename handling logic temporarily deleting a path during the replacement of a parent. """ null = DirState.NULL_PARENT_DETAILS for old_path, new_path, file_id, _, real_delete in deletes: if real_delete != (new_path is None): self._raise_invalid(old_path, file_id, "bad delete delta") # the entry for this file_id must be in tree 1. dirname, basename = osutils.split(old_path) block_index, entry_index, dir_present, file_present = \ self._get_block_entry_index(dirname, basename, 1) if not file_present: self._raise_invalid(old_path, file_id, 'basis tree does not contain removed entry') entry = self._dirblocks[block_index][1][entry_index] # The state of the entry in the 'active' WT active_kind = entry[1][0][0] if entry[0][2] != file_id: self._raise_invalid(old_path, file_id, 'mismatched file_id in tree 1') dir_block = () old_kind = entry[1][1][0] if active_kind in 'ar': # The active tree doesn't have this file_id. # The basis tree is changing this record. If this is a # rename, then we don't want the record here at all # anymore. If it is just an in-place change, we want the # record here, but we'll add it if we need to. So we just # delete it if active_kind == 'r': active_path = entry[1][0][1] active_entry = self._get_entry(0, file_id, active_path) if active_entry[1][1][0] != 'r': self._raise_invalid(old_path, file_id, "Dirstate did not have matching rename entries") elif active_entry[1][0][0] in 'ar': self._raise_invalid(old_path, file_id, "Dirstate had a rename pointing at an inactive" " tree0") active_entry[1][1] = null del self._dirblocks[block_index][1][entry_index] if old_kind == 'd': # This was a directory, and the active tree says it # doesn't exist, and now the basis tree says it doesn't # exist. Remove its dirblock if present (dir_block_index, present) = self._find_block_index_from_key( (old_path, '', '')) if present: dir_block = self._dirblocks[dir_block_index][1] if not dir_block: # This entry is empty, go ahead and just remove it del self._dirblocks[dir_block_index] else: # There is still an active record, so just mark this # removed. entry[1][1] = null block_i, entry_i, d_present, f_present = \ self._get_block_entry_index(old_path, '', 1) if d_present: dir_block = self._dirblocks[block_i][1] for child_entry in dir_block: child_basis_kind = child_entry[1][1][0] if child_basis_kind not in 'ar': self._raise_invalid(old_path, file_id, "The file id was deleted but its children were " "not deleted.") def _after_delta_check_parents(self, parents, index): """Check that parents required by the delta are all intact. :param parents: An iterable of (path_utf8, file_id) tuples which are required to be present in tree 'index' at path_utf8 with id file_id and be a directory. :param index: The column in the dirstate to check for parents in. """ for dirname_utf8, file_id in parents: # Get the entry - the ensures that file_id, dirname_utf8 exists and # has the right file id. entry = self._get_entry(index, file_id, dirname_utf8) if entry[1] is None: self._raise_invalid(dirname_utf8.decode('utf8'), file_id, "This parent is not present.") # Parents of things must be directories if entry[1][index][0] != 'd': self._raise_invalid(dirname_utf8.decode('utf8'), file_id, "This parent is not a directory.") def _observed_sha1(self, entry, sha1, stat_value, _stat_to_minikind=_stat_to_minikind): """Note the sha1 of a file. :param entry: The entry the sha1 is for. :param sha1: The observed sha1. :param stat_value: The os.lstat for the file. """ try: minikind = _stat_to_minikind[stat_value.st_mode & 0170000] except KeyError: # Unhandled kind return None if minikind == 'f': if self._cutoff_time is None: self._sha_cutoff_time() if (stat_value.st_mtime < self._cutoff_time and stat_value.st_ctime < self._cutoff_time): entry[1][0] = ('f', sha1, stat_value.st_size, entry[1][0][3], pack_stat(stat_value)) self._mark_modified([entry]) def _sha_cutoff_time(self): """Return cutoff time. Files modified more recently than this time are at risk of being undetectably modified and so can't be cached. """ # Cache the cutoff time as long as we hold a lock. # time.time() isn't super expensive (approx 3.38us), but # when you call it 50,000 times it adds up. # For comparison, os.lstat() costs 7.2us if it is hot. self._cutoff_time = int(time.time()) - 3 return self._cutoff_time def _lstat(self, abspath, entry): """Return the os.lstat value for this path.""" return os.lstat(abspath) def _sha1_file_and_mutter(self, abspath): # when -Dhashcache is turned on, this is monkey-patched in to log # file reads trace.mutter("dirstate sha1 " + abspath) return self._sha1_provider.sha1(abspath) def _is_executable(self, mode, old_executable): """Is this file executable?""" return bool(S_IEXEC & mode) def _is_executable_win32(self, mode, old_executable): """On win32 the executable bit is stored in the dirstate.""" return old_executable if sys.platform == 'win32': _is_executable = _is_executable_win32 def _read_link(self, abspath, old_link): """Read the target of a symlink""" # TODO: jam 200700301 On Win32, this could just return the value # already in memory. However, this really needs to be done at a # higher level, because there either won't be anything on disk, # or the thing on disk will be a file. fs_encoding = osutils._fs_enc if isinstance(abspath, unicode): # abspath is defined as the path to pass to lstat. readlink is # buggy in python < 2.6 (it doesn't encode unicode path into FS # encoding), so we need to encode ourselves knowing that unicode # paths are produced by UnicodeDirReader on purpose. abspath = abspath.encode(fs_encoding) target = os.readlink(abspath) if fs_encoding not in ('utf-8', 'ascii'): # Change encoding if needed target = target.decode(fs_encoding).encode('UTF-8') return target def get_ghosts(self): """Return a list of the parent tree revision ids that are ghosts.""" self._read_header_if_needed() return self._ghosts def get_lines(self): """Serialise the entire dirstate to a sequence of lines.""" if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED): # read what's on disk. self._state_file.seek(0) return self._state_file.readlines() lines = [] lines.append(self._get_parents_line(self.get_parent_ids())) lines.append(self._get_ghosts_line(self._ghosts)) lines.extend(self._get_entry_lines()) return self._get_output_lines(lines) def _get_ghosts_line(self, ghost_ids): """Create a line for the state file for ghost information.""" return '\0'.join([str(len(ghost_ids))] + ghost_ids) def _get_parents_line(self, parent_ids): """Create a line for the state file for parents information.""" return '\0'.join([str(len(parent_ids))] + parent_ids) def _get_entry_lines(self): """Create lines for entries.""" return map(self._entry_to_line, self._iter_entries()) def _get_fields_to_entry(self): """Get a function which converts entry fields into a entry record. This handles size and executable, as well as parent records. :return: A function which takes a list of fields, and returns an appropriate record for storing in memory. """ # This is intentionally unrolled for performance num_present_parents = self._num_present_parents() if num_present_parents == 0: def fields_to_entry_0_parents(fields, _int=int): path_name_file_id_key = (fields[0], fields[1], fields[2]) return (path_name_file_id_key, [ ( # Current tree fields[3], # minikind fields[4], # fingerprint _int(fields[5]), # size fields[6] == 'y', # executable fields[7], # packed_stat or revision_id )]) return fields_to_entry_0_parents elif num_present_parents == 1: def fields_to_entry_1_parent(fields, _int=int): path_name_file_id_key = (fields[0], fields[1], fields[2]) return (path_name_file_id_key, [ ( # Current tree fields[3], # minikind fields[4], # fingerprint _int(fields[5]), # size fields[6] == 'y', # executable fields[7], # packed_stat or revision_id ), ( # Parent 1 fields[8], # minikind fields[9], # fingerprint _int(fields[10]), # size fields[11] == 'y', # executable fields[12], # packed_stat or revision_id ), ]) return fields_to_entry_1_parent elif num_present_parents == 2: def fields_to_entry_2_parents(fields, _int=int): path_name_file_id_key = (fields[0], fields[1], fields[2]) return (path_name_file_id_key, [ ( # Current tree fields[3], # minikind fields[4], # fingerprint _int(fields[5]), # size fields[6] == 'y', # executable fields[7], # packed_stat or revision_id ), ( # Parent 1 fields[8], # minikind fields[9], # fingerprint _int(fields[10]), # size fields[11] == 'y', # executable fields[12], # packed_stat or revision_id ), ( # Parent 2 fields[13], # minikind fields[14], # fingerprint _int(fields[15]), # size fields[16] == 'y', # executable fields[17], # packed_stat or revision_id ), ]) return fields_to_entry_2_parents else: def fields_to_entry_n_parents(fields, _int=int): path_name_file_id_key = (fields[0], fields[1], fields[2]) trees = [(fields[cur], # minikind fields[cur+1], # fingerprint _int(fields[cur+2]), # size fields[cur+3] == 'y', # executable fields[cur+4], # stat or revision_id ) for cur in xrange(3, len(fields)-1, 5)] return path_name_file_id_key, trees return fields_to_entry_n_parents def get_parent_ids(self): """Return a list of the parent tree ids for the directory state.""" self._read_header_if_needed() return list(self._parents) def _get_block_entry_index(self, dirname, basename, tree_index): """Get the coordinates for a path in the state structure. :param dirname: The utf8 dirname to lookup. :param basename: The utf8 basename to lookup. :param tree_index: The index of the tree for which this lookup should be attempted. :return: A tuple describing where the path is located, or should be inserted. The tuple contains four fields: the block index, the row index, the directory is present (boolean), the entire path is present (boolean). There is no guarantee that either coordinate is currently reachable unless the found field for it is True. For instance, a directory not present in the searched tree may be returned with a value one greater than the current highest block offset. The directory present field will always be True when the path present field is True. The directory present field does NOT indicate that the directory is present in the searched tree, rather it indicates that there are at least some files in some tree present there. """ self._read_dirblocks_if_needed() key = dirname, basename, '' block_index, present = self._find_block_index_from_key(key) if not present: # no such directory - return the dir index and 0 for the row. return block_index, 0, False, False block = self._dirblocks[block_index][1] # access the entries only entry_index, present = self._find_entry_index(key, block) # linear search through entries at this path to find the one # requested. while entry_index < len(block) and block[entry_index][0][1] == basename: if block[entry_index][1][tree_index][0] not in 'ar': # neither absent or relocated return block_index, entry_index, True, True entry_index += 1 return block_index, entry_index, True, False def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None, include_deleted=False): """Get the dirstate entry for path in tree tree_index. If either file_id or path is supplied, it is used as the key to lookup. If both are supplied, the fastest lookup is used, and an error is raised if they do not both point at the same row. :param tree_index: The index of the tree we wish to locate this path in. If the path is present in that tree, the entry containing its details is returned, otherwise (None, None) is returned 0 is the working tree, higher indexes are successive parent trees. :param fileid_utf8: A utf8 file_id to look up. :param path_utf8: An utf8 path to be looked up. :param include_deleted: If True, and performing a lookup via fileid_utf8 rather than path_utf8, return an entry for deleted (absent) paths. :return: The dirstate entry tuple for path, or (None, None) """ self._read_dirblocks_if_needed() if path_utf8 is not None: if type(path_utf8) is not str: raise errors.BzrError('path_utf8 is not a str: %s %r' % (type(path_utf8), path_utf8)) # path lookups are faster dirname, basename = osutils.split(path_utf8) block_index, entry_index, dir_present, file_present = \ self._get_block_entry_index(dirname, basename, tree_index) if not file_present: return None, None entry = self._dirblocks[block_index][1][entry_index] if not (entry[0][2] and entry[1][tree_index][0] not in ('a', 'r')): raise AssertionError('unversioned entry?') if fileid_utf8: if entry[0][2] != fileid_utf8: self._changes_aborted = True raise errors.BzrError('integrity error ? : mismatching' ' tree_index, file_id and path') return entry else: possible_keys = self._get_id_index().get(fileid_utf8, ()) if not possible_keys: return None, None for key in possible_keys: block_index, present = \ self._find_block_index_from_key(key) # strange, probably indicates an out of date # id index - for now, allow this. if not present: continue # WARNING: DO not change this code to use _get_block_entry_index # as that function is not suitable: it does not use the key # to lookup, and thus the wrong coordinates are returned. block = self._dirblocks[block_index][1] entry_index, present = self._find_entry_index(key, block) if present: entry = self._dirblocks[block_index][1][entry_index] # TODO: We might want to assert that entry[0][2] == # fileid_utf8. if entry[1][tree_index][0] in 'fdlt': # this is the result we are looking for: the # real home of this file_id in this tree. return entry if entry[1][tree_index][0] == 'a': # there is no home for this entry in this tree if include_deleted: return entry return None, None if entry[1][tree_index][0] != 'r': raise AssertionError( "entry %r has invalid minikind %r for tree %r" \ % (entry, entry[1][tree_index][0], tree_index)) real_path = entry[1][tree_index][1] return self._get_entry(tree_index, fileid_utf8=fileid_utf8, path_utf8=real_path) return None, None @classmethod def initialize(cls, path, sha1_provider=None): """Create a new dirstate on path. The new dirstate will be an empty tree - that is it has no parents, and only a root node - which has id ROOT_ID. :param path: The name of the file for the dirstate. :param sha1_provider: an object meeting the SHA1Provider interface. If None, a DefaultSHA1Provider is used. :return: A write-locked DirState object. """ # This constructs a new DirState object on a path, sets the _state_file # to a new empty file for that path. It then calls _set_data() with our # stock empty dirstate information - a root with ROOT_ID, no children, # and no parents. Finally it calls save() to ensure that this data will # persist. if sha1_provider is None: sha1_provider = DefaultSHA1Provider() result = cls(path, sha1_provider) # root dir and root dir contents with no children. empty_tree_dirblocks = [('', []), ('', [])] # a new root directory, with a NULLSTAT. empty_tree_dirblocks[0][1].append( (('', '', inventory.ROOT_ID), [ ('d', '', 0, False, DirState.NULLSTAT), ])) result.lock_write() try: result._set_data([], empty_tree_dirblocks) result.save() except: result.unlock() raise return result @staticmethod def _inv_entry_to_details(inv_entry): """Convert an inventory entry (from a revision tree) to state details. :param inv_entry: An inventory entry whose sha1 and link targets can be relied upon, and which has a revision set. :return: A details tuple - the details for a single tree at a path + id. """ kind = inv_entry.kind minikind = DirState._kind_to_minikind[kind] tree_data = inv_entry.revision if kind == 'directory': fingerprint = '' size = 0 executable = False elif kind == 'symlink': if inv_entry.symlink_target is None: fingerprint = '' else: fingerprint = inv_entry.symlink_target.encode('utf8') size = 0 executable = False elif kind == 'file': fingerprint = inv_entry.text_sha1 or '' size = inv_entry.text_size or 0 executable = inv_entry.executable elif kind == 'tree-reference': fingerprint = inv_entry.reference_revision or '' size = 0 executable = False else: raise Exception("can't pack %s" % inv_entry) return static_tuple.StaticTuple(minikind, fingerprint, size, executable, tree_data) def _iter_child_entries(self, tree_index, path_utf8): """Iterate over all the entries that are children of path_utf. This only returns entries that are present (not in 'a', 'r') in tree_index. tree_index data is not refreshed, so if tree 0 is used, results may differ from that obtained if paths were statted to determine what ones were directories. Asking for the children of a non-directory will return an empty iterator. """ pending_dirs = [] next_pending_dirs = [path_utf8] absent = 'ar' while next_pending_dirs: pending_dirs = next_pending_dirs next_pending_dirs = [] for path in pending_dirs: block_index, present = self._find_block_index_from_key( (path, '', '')) if block_index == 0: block_index = 1 if len(self._dirblocks) == 1: # asked for the children of the root with no other # contents. return if not present: # children of a non-directory asked for. continue block = self._dirblocks[block_index] for entry in block[1]: kind = entry[1][tree_index][0] if kind not in absent: yield entry if kind == 'd': if entry[0][0]: path = entry[0][0] + '/' + entry[0][1] else: path = entry[0][1] next_pending_dirs.append(path) def _iter_entries(self): """Iterate over all the entries in the dirstate. Each yelt item is an entry in the standard format described in the docstring of bzrlib.dirstate. """ self._read_dirblocks_if_needed() for directory in self._dirblocks: for entry in directory[1]: yield entry def _get_id_index(self): """Get an id index of self._dirblocks. This maps from file_id => [(directory, name, file_id)] entries where that file_id appears in one of the trees. """ if self._id_index is None: id_index = {} for key, tree_details in self._iter_entries(): self._add_to_id_index(id_index, key) self._id_index = id_index return self._id_index def _add_to_id_index(self, id_index, entry_key): """Add this entry to the _id_index mapping.""" # This code used to use a set for every entry in the id_index. However, # it is *rare* to have more than one entry. So a set is a large # overkill. And even when we do, we won't ever have more than the # number of parent trees. Which is still a small number (rarely >2). As # such, we use a simple tuple, and do our own uniqueness checks. While # the 'in' check is O(N) since N is nicely bounded it shouldn't ever # cause quadratic failure. file_id = entry_key[2] entry_key = static_tuple.StaticTuple.from_sequence(entry_key) if file_id not in id_index: id_index[file_id] = static_tuple.StaticTuple(entry_key,) else: entry_keys = id_index[file_id] if entry_key not in entry_keys: id_index[file_id] = entry_keys + (entry_key,) def _remove_from_id_index(self, id_index, entry_key): """Remove this entry from the _id_index mapping. It is an programming error to call this when the entry_key is not already present. """ file_id = entry_key[2] entry_keys = list(id_index[file_id]) entry_keys.remove(entry_key) id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys) def _get_output_lines(self, lines): """Format lines for final output. :param lines: A sequence of lines containing the parents list and the path lines. """ output_lines = [DirState.HEADER_FORMAT_3] lines.append('') # a final newline inventory_text = '\0\n\0'.join(lines) output_lines.append('crc32: %s\n' % (zlib.crc32(inventory_text),)) # -3, 1 for num parents, 1 for ghosts, 1 for final newline num_entries = len(lines)-3 output_lines.append('num_entries: %s\n' % (num_entries,)) output_lines.append(inventory_text) return output_lines def _make_deleted_row(self, fileid_utf8, parents): """Return a deleted row for fileid_utf8.""" return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT, ''), parents def _num_present_parents(self): """The number of parent entries in each record row.""" return len(self._parents) - len(self._ghosts) @classmethod def on_file(cls, path, sha1_provider=None, worth_saving_limit=0): """Construct a DirState on the file at path "path". :param path: The path at which the dirstate file on disk should live. :param sha1_provider: an object meeting the SHA1Provider interface. If None, a DefaultSHA1Provider is used. :param worth_saving_limit: when the exact number of hash changed entries is known, only bother saving the dirstate if more than this count of entries have changed. -1 means never save. :return: An unlocked DirState object, associated with the given path. """ if sha1_provider is None: sha1_provider = DefaultSHA1Provider() result = cls(path, sha1_provider, worth_saving_limit=worth_saving_limit) return result def _read_dirblocks_if_needed(self): """Read in all the dirblocks from the file if they are not in memory. This populates self._dirblocks, and sets self._dirblock_state to IN_MEMORY_UNMODIFIED. It is not currently ready for incremental block loading. """ self._read_header_if_needed() if self._dirblock_state == DirState.NOT_IN_MEMORY: _read_dirblocks(self) def _read_header(self): """This reads in the metadata header, and the parent ids. After reading in, the file should be positioned at the null just before the start of the first record in the file. :return: (expected crc checksum, number of entries, parent list) """ self._read_prelude() parent_line = self._state_file.readline() info = parent_line.split('\0') num_parents = int(info[0]) self._parents = info[1:-1] ghost_line = self._state_file.readline() info = ghost_line.split('\0') num_ghosts = int(info[1]) self._ghosts = info[2:-1] self._header_state = DirState.IN_MEMORY_UNMODIFIED self._end_of_header = self._state_file.tell() def _read_header_if_needed(self): """Read the header of the dirstate file if needed.""" # inline this as it will be called a lot if not self._lock_token: raise errors.ObjectNotLocked(self) if self._header_state == DirState.NOT_IN_MEMORY: self._read_header() def _read_prelude(self): """Read in the prelude header of the dirstate file. This only reads in the stuff that is not connected to the crc checksum. The position will be correct to read in the rest of the file and check the checksum after this point. The next entry in the file should be the number of parents, and their ids. Followed by a newline. """ header = self._state_file.readline() if header != DirState.HEADER_FORMAT_3: raise errors.BzrError( 'invalid header line: %r' % (header,)) crc_line = self._state_file.readline() if not crc_line.startswith('crc32: '): raise errors.BzrError('missing crc32 checksum: %r' % crc_line) self.crc_expected = int(crc_line[len('crc32: '):-1]) num_entries_line = self._state_file.readline() if not num_entries_line.startswith('num_entries: '): raise errors.BzrError('missing num_entries line') self._num_entries = int(num_entries_line[len('num_entries: '):-1]) def sha1_from_stat(self, path, stat_result): """Find a sha1 given a stat lookup.""" return self._get_packed_stat_index().get(pack_stat(stat_result), None) def _get_packed_stat_index(self): """Get a packed_stat index of self._dirblocks.""" if self._packed_stat_index is None: index = {} for key, tree_details in self._iter_entries(): if tree_details[0][0] == 'f': index[tree_details[0][4]] = tree_details[0][1] self._packed_stat_index = index return self._packed_stat_index def save(self): """Save any pending changes created during this session. We reuse the existing file, because that prevents race conditions with file creation, and use oslocks on it to prevent concurrent modification and reads - because dirstate's incremental data aggregation is not compatible with reading a modified file, and replacing a file in use by another process is impossible on Windows. A dirstate in read only mode should be smart enough though to validate that the file has not changed, and otherwise discard its cache and start over, to allow for fine grained read lock duration, so 'status' wont block 'commit' - for example. """ if self._changes_aborted: # Should this be a warning? For now, I'm expecting that places that # mark it inconsistent will warn, making a warning here redundant. trace.mutter('Not saving DirState because ' '_changes_aborted is set.') return # TODO: Since we now distinguish IN_MEMORY_MODIFIED from # IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail # to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we # fail to save IN_MEMORY_MODIFIED if not self._worth_saving(): return grabbed_write_lock = False if self._lock_state != 'w': grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock() # Switch over to the new lock, as the old one may be closed. # TODO: jam 20070315 We should validate the disk file has # not changed contents, since temporary_write_lock may # not be an atomic operation. self._lock_token = new_lock self._state_file = new_lock.f if not grabbed_write_lock: # We couldn't grab a write lock, so we switch back to a read one return try: lines = self.get_lines() self._state_file.seek(0) self._state_file.writelines(lines) self._state_file.truncate() self._state_file.flush() self._maybe_fdatasync() self._mark_unmodified() finally: if grabbed_write_lock: self._lock_token = self._lock_token.restore_read_lock() self._state_file = self._lock_token.f # TODO: jam 20070315 We should validate the disk file has # not changed contents. Since restore_read_lock may # not be an atomic operation. def _maybe_fdatasync(self): """Flush to disk if possible and if not configured off.""" if self._config_stack.get('dirstate.fdatasync'): osutils.fdatasync(self._state_file.fileno()) def _worth_saving(self): """Is it worth saving the dirstate or not?""" if (self._header_state == DirState.IN_MEMORY_MODIFIED or self._dirblock_state == DirState.IN_MEMORY_MODIFIED): return True if self._dirblock_state == DirState.IN_MEMORY_HASH_MODIFIED: if self._worth_saving_limit == -1: # We never save hash changes when the limit is -1 return False # If we're using smart saving and only a small number of # entries have changed their hash, don't bother saving. John has # suggested using a heuristic here based on the size of the # changed files and/or tree. For now, we go with a configurable # number of changes, keeping the calculation time # as low overhead as possible. (This also keeps all existing # tests passing as the default is 0, i.e. always save.) if len(self._known_hash_changes) >= self._worth_saving_limit: return True return False def _set_data(self, parent_ids, dirblocks): """Set the full dirstate data in memory. This is an internal function used to completely replace the objects in memory state. It puts the dirstate into state 'full-dirty'. :param parent_ids: A list of parent tree revision ids. :param dirblocks: A list containing one tuple for each directory in the tree. Each tuple contains the directory path and a list of entries found in that directory. """ # our memory copy is now authoritative. self._dirblocks = dirblocks self._mark_modified(header_modified=True) self._parents = list(parent_ids) self._id_index = None self._packed_stat_index = None def set_path_id(self, path, new_id): """Change the id of path to new_id in the current working tree. :param path: The path inside the tree to set - '' is the root, 'foo' is the path foo in the root. :param new_id: The new id to assign to the path. This must be a utf8 file id (not unicode, and not None). """ self._read_dirblocks_if_needed() if len(path): # TODO: logic not written raise NotImplementedError(self.set_path_id) # TODO: check new id is unique entry = self._get_entry(0, path_utf8=path) if entry[0][2] == new_id: # Nothing to change. return # mark the old path absent, and insert a new root path self._make_absent(entry) self.update_minimal(('', '', new_id), 'd', path_utf8='', packed_stat=entry[1][0][4]) self._mark_modified() def set_parent_trees(self, trees, ghosts): """Set the parent trees for the dirstate. :param trees: A list of revision_id, tree tuples. tree must be provided even if the revision_id refers to a ghost: supply an empty tree in this case. :param ghosts: A list of the revision_ids that are ghosts at the time of setting. """ # TODO: generate a list of parent indexes to preserve to save # processing specific parent trees. In the common case one tree will # be preserved - the left most parent. # TODO: if the parent tree is a dirstate, we might want to walk them # all by path in parallel for 'optimal' common-case performance. # generate new root row. self._read_dirblocks_if_needed() # TODO future sketch: Examine the existing parents to generate a change # map and then walk the new parent trees only, mapping them into the # dirstate. Walk the dirstate at the same time to remove unreferenced # entries. # for now: # sketch: loop over all entries in the dirstate, cherry picking # entries from the parent trees, if they are not ghost trees. # after we finish walking the dirstate, all entries not in the dirstate # are deletes, so we want to append them to the end as per the design # discussions. So do a set difference on ids with the parents to # get deletes, and add them to the end. # During the update process we need to answer the following questions: # - find other keys containing a fileid in order to create cross-path # links. We dont't trivially use the inventory from other trees # because this leads to either double touching, or to accessing # missing keys, # - find other keys containing a path # We accumulate each entry via this dictionary, including the root by_path = {} id_index = {} # we could do parallel iterators, but because file id data may be # scattered throughout, we dont save on index overhead: we have to look # at everything anyway. We can probably save cycles by reusing parent # data and doing an incremental update when adding an additional # parent, but for now the common cases are adding a new parent (merge), # and replacing completely (commit), and commit is more common: so # optimise merge later. # ---- start generation of full tree mapping data # what trees should we use? parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts] # how many trees do we end up with parent_count = len(parent_trees) st = static_tuple.StaticTuple # one: the current tree for entry in self._iter_entries(): # skip entries not in the current tree if entry[1][0][0] in 'ar': # absent, relocated continue by_path[entry[0]] = [entry[1][0]] + \ [DirState.NULL_PARENT_DETAILS] * parent_count # TODO: Possibly inline this, since we know it isn't present yet # id_index[entry[0][2]] = (entry[0],) self._add_to_id_index(id_index, entry[0]) # now the parent trees: for tree_index, tree in enumerate(parent_trees): # the index is off by one, adjust it. tree_index = tree_index + 1 # when we add new locations for a fileid we need these ranges for # any fileid in this tree as we set the by_path[id] to: # already_processed_tree_details + new_details + new_location_suffix # the suffix is from tree_index+1:parent_count+1. new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index) # now stitch in all the entries from this tree last_dirname = None for path, entry in tree.iter_entries_by_dir(): # here we process each trees details for each item in the tree. # we first update any existing entries for the id at other paths, # then we either create or update the entry for the id at the # right path, and finally we add (if needed) a mapping from # file_id to this path. We do it in this order to allow us to # avoid checking all known paths for the id when generating a # new entry at this path: by adding the id->path mapping last, # all the mappings are valid and have correct relocation # records where needed. file_id = entry.file_id path_utf8 = path.encode('utf8') dirname, basename = osutils.split(path_utf8) if dirname == last_dirname: # Try to re-use objects as much as possible dirname = last_dirname else: last_dirname = dirname new_entry_key = st(dirname, basename, file_id) # tree index consistency: All other paths for this id in this tree # index must point to the correct path. entry_keys = id_index.get(file_id, ()) for entry_key in entry_keys: # TODO:PROFILING: It might be faster to just update # rather than checking if we need to, and then overwrite # the one we are located at. if entry_key != new_entry_key: # this file id is at a different path in one of the # other trees, so put absent pointers there # This is the vertical axis in the matrix, all pointing # to the real path. by_path[entry_key][tree_index] = st('r', path_utf8, 0, False, '') # by path consistency: Insert into an existing path record # (trivial), or add a new one with relocation pointers for the # other tree indexes. if new_entry_key in entry_keys: # there is already an entry where this data belongs, just # insert it. by_path[new_entry_key][tree_index] = \ self._inv_entry_to_details(entry) else: # add relocated entries to the horizontal axis - this row # mapping from path,id. We need to look up the correct path # for the indexes from 0 to tree_index -1 new_details = [] for lookup_index in xrange(tree_index): # boundary case: this is the first occurence of file_id # so there are no id_indexes, possibly take this out of # the loop? if not len(entry_keys): new_details.append(DirState.NULL_PARENT_DETAILS) else: # grab any one entry, use it to find the right path. a_key = iter(entry_keys).next() if by_path[a_key][lookup_index][0] in ('r', 'a'): # its a pointer or missing statement, use it as # is. new_details.append(by_path[a_key][lookup_index]) else: # we have the right key, make a pointer to it. real_path = ('/'.join(a_key[0:2])).strip('/') new_details.append(st('r', real_path, 0, False, '')) new_details.append(self._inv_entry_to_details(entry)) new_details.extend(new_location_suffix) by_path[new_entry_key] = new_details self._add_to_id_index(id_index, new_entry_key) # --- end generation of full tree mappings # sort and output all the entries new_entries = self._sort_entries(by_path.items()) self._entries_to_current_state(new_entries) self._parents = [rev_id for rev_id, tree in trees] self._ghosts = list(ghosts) self._mark_modified(header_modified=True) self._id_index = id_index def _sort_entries(self, entry_list): """Given a list of entries, sort them into the right order. This is done when constructing a new dirstate from trees - normally we try to keep everything in sorted blocks all the time, but sometimes it's easier to sort after the fact. """ # When sorting, we usually have 10x more entries than directories. (69k # total entries, 4k directories). So cache the results of splitting. # Saving time and objects. Also, use StaticTuple to avoid putting all # of these object into python's garbage collector. split_dirs = {} def _key(entry, _split_dirs=split_dirs, _st=static_tuple.StaticTuple): # sort by: directory parts, file name, file id dirpath, fname, file_id = entry[0] try: split = _split_dirs[dirpath] except KeyError: split = _st.from_sequence(dirpath.split('/')) _split_dirs[dirpath] = split return _st(split, fname, file_id) return sorted(entry_list, key=_key) def set_state_from_inventory(self, new_inv): """Set new_inv as the current state. This API is called by tree transform, and will usually occur with existing parent trees. :param new_inv: The inventory object to set current state from. """ if 'evil' in debug.debug_flags: trace.mutter_callsite(1, "set_state_from_inventory called; please mutate the tree instead") tracing = 'dirstate' in debug.debug_flags if tracing: trace.mutter("set_state_from_inventory trace:") self._read_dirblocks_if_needed() # sketch: # Two iterators: current data and new data, both in dirblock order. # We zip them together, which tells about entries that are new in the # inventory, or removed in the inventory, or present in both and # possibly changed. # # You might think we could just synthesize a new dirstate directly # since we're processing it in the right order. However, we need to # also consider there may be any number of parent trees and relocation # pointers, and we don't want to duplicate that here. new_iterator = new_inv.iter_entries_by_dir() # we will be modifying the dirstate, so we need a stable iterator. In # future we might write one, for now we just clone the state into a # list using a copy so that we see every original item and don't have # to adjust the position when items are inserted or deleted in the # underlying dirstate. old_iterator = iter(list(self._iter_entries())) # both must have roots so this is safe: current_new = new_iterator.next() current_old = old_iterator.next() def advance(iterator): try: return iterator.next() except StopIteration: return None while current_new or current_old: # skip entries in old that are not really there if current_old and current_old[1][0][0] in 'ar': # relocated or absent current_old = advance(old_iterator) continue if current_new: # convert new into dirblock style new_path_utf8 = current_new[0].encode('utf8') new_dirname, new_basename = osutils.split(new_path_utf8) new_id = current_new[1].file_id new_entry_key = (new_dirname, new_basename, new_id) current_new_minikind = \ DirState._kind_to_minikind[current_new[1].kind] if current_new_minikind == 't': fingerprint = current_new[1].reference_revision or '' else: # We normally only insert or remove records, or update # them when it has significantly changed. Then we want to # erase its fingerprint. Unaffected records should # normally not be updated at all. fingerprint = '' else: # for safety disable variables new_path_utf8 = new_dirname = new_basename = new_id = \ new_entry_key = None # 5 cases, we dont have a value that is strictly greater than everything, so # we make both end conditions explicit if not current_old: # old is finished: insert current_new into the state. if tracing: trace.mutter("Appending from new '%s'.", new_path_utf8.decode('utf8')) self.update_minimal(new_entry_key, current_new_minikind, executable=current_new[1].executable, path_utf8=new_path_utf8, fingerprint=fingerprint, fullscan=True) current_new = advance(new_iterator) elif not current_new: # new is finished if tracing: trace.mutter("Truncating from old '%s/%s'.", current_old[0][0].decode('utf8'), current_old[0][1].decode('utf8')) self._make_absent(current_old) current_old = advance(old_iterator) elif new_entry_key == current_old[0]: # same - common case # We're looking at the same path and id in both the dirstate # and inventory, so just need to update the fields in the # dirstate from the one in the inventory. # TODO: update the record if anything significant has changed. # the minimal required trigger is if the execute bit or cached # kind has changed. if (current_old[1][0][3] != current_new[1].executable or current_old[1][0][0] != current_new_minikind): if tracing: trace.mutter("Updating in-place change '%s'.", new_path_utf8.decode('utf8')) self.update_minimal(current_old[0], current_new_minikind, executable=current_new[1].executable, path_utf8=new_path_utf8, fingerprint=fingerprint, fullscan=True) # both sides are dealt with, move on current_old = advance(old_iterator) current_new = advance(new_iterator) elif (cmp_by_dirs(new_dirname, current_old[0][0]) < 0 or (new_dirname == current_old[0][0] and new_entry_key[1:] < current_old[0][1:])): # new comes before: # add a entry for this and advance new if tracing: trace.mutter("Inserting from new '%s'.", new_path_utf8.decode('utf8')) self.update_minimal(new_entry_key, current_new_minikind, executable=current_new[1].executable, path_utf8=new_path_utf8, fingerprint=fingerprint, fullscan=True) current_new = advance(new_iterator) else: # we've advanced past the place where the old key would be, # without seeing it in the new list. so it must be gone. if tracing: trace.mutter("Deleting from old '%s/%s'.", current_old[0][0].decode('utf8'), current_old[0][1].decode('utf8')) self._make_absent(current_old) current_old = advance(old_iterator) self._mark_modified() self._id_index = None self._packed_stat_index = None if tracing: trace.mutter("set_state_from_inventory complete.") def set_state_from_scratch(self, working_inv, parent_trees, parent_ghosts): """Wipe the currently stored state and set it to something new. This is a hard-reset for the data we are working with. """ # Technically, we really want a write lock, but until we write, we # don't really need it. self._requires_lock() # root dir and root dir contents with no children. We have to have a # root for set_state_from_inventory to work correctly. empty_root = (('', '', inventory.ROOT_ID), [('d', '', 0, False, DirState.NULLSTAT)]) empty_tree_dirblocks = [('', [empty_root]), ('', [])] self._set_data([], empty_tree_dirblocks) self.set_state_from_inventory(working_inv) self.set_parent_trees(parent_trees, parent_ghosts) def _make_absent(self, current_old): """Mark current_old - an entry - as absent for tree 0. :return: True if this was the last details entry for the entry key: that is, if the underlying block has had the entry removed, thus shrinking in length. """ # build up paths that this id will be left at after the change is made, # so we can update their cross references in tree 0 all_remaining_keys = set() # Dont check the working tree, because it's going. for details in current_old[1][1:]: if details[0] not in 'ar': # absent, relocated all_remaining_keys.add(current_old[0]) elif details[0] == 'r': # relocated # record the key for the real path. all_remaining_keys.add(tuple(osutils.split(details[1])) + (current_old[0][2],)) # absent rows are not present at any path. last_reference = current_old[0] not in all_remaining_keys if last_reference: # the current row consists entire of the current item (being marked # absent), and relocated or absent entries for the other trees: # Remove it, its meaningless. block = self._find_block(current_old[0]) entry_index, present = self._find_entry_index(current_old[0], block[1]) if not present: raise AssertionError('could not find entry for %s' % (current_old,)) block[1].pop(entry_index) # if we have an id_index in use, remove this key from it for this id. if self._id_index is not None: self._remove_from_id_index(self._id_index, current_old[0]) # update all remaining keys for this id to record it as absent. The # existing details may either be the record we are marking as deleted # (if there were other trees with the id present at this path), or may # be relocations. for update_key in all_remaining_keys: update_block_index, present = \ self._find_block_index_from_key(update_key) if not present: raise AssertionError('could not find block for %s' % (update_key,)) update_entry_index, present = \ self._find_entry_index(update_key, self._dirblocks[update_block_index][1]) if not present: raise AssertionError('could not find entry for %s' % (update_key,)) update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1] # it must not be absent at the moment if update_tree_details[0][0] == 'a': # absent raise AssertionError('bad row %r' % (update_tree_details,)) update_tree_details[0] = DirState.NULL_PARENT_DETAILS self._mark_modified() return last_reference def update_minimal(self, key, minikind, executable=False, fingerprint='', packed_stat=None, size=0, path_utf8=None, fullscan=False): """Update an entry to the state in tree 0. This will either create a new entry at 'key' or update an existing one. It also makes sure that any other records which might mention this are updated as well. :param key: (dir, name, file_id) for the new entry :param minikind: The type for the entry ('f' == 'file', 'd' == 'directory'), etc. :param executable: Should the executable bit be set? :param fingerprint: Simple fingerprint for new entry: canonical-form sha1 for files, referenced revision id for subtrees, etc. :param packed_stat: Packed stat value for new entry. :param size: Size information for new entry :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing extra computation. :param fullscan: If True then a complete scan of the dirstate is being done and checking for duplicate rows should not be done. This should only be set by set_state_from_inventory and similar methods. If packed_stat and fingerprint are not given, they're invalidated in the entry. """ block = self._find_block(key)[1] if packed_stat is None: packed_stat = DirState.NULLSTAT # XXX: Some callers pass '' as the packed_stat, and it seems to be # sometimes present in the dirstate - this seems oddly inconsistent. # mbp 20071008 entry_index, present = self._find_entry_index(key, block) new_details = (minikind, fingerprint, size, executable, packed_stat) id_index = self._get_id_index() if not present: # New record. Check there isn't a entry at this path already. if not fullscan: low_index, _ = self._find_entry_index(key[0:2] + ('',), block) while low_index < len(block): entry = block[low_index] if entry[0][0:2] == key[0:2]: if entry[1][0][0] not in 'ar': # This entry has the same path (but a different id) as # the new entry we're adding, and is present in ths # tree. self._raise_invalid( ("%s/%s" % key[0:2]).decode('utf8'), key[2], "Attempt to add item at path already occupied by " "id %r" % entry[0][2]) low_index += 1 else: break # new entry, synthesis cross reference here, existing_keys = id_index.get(key[2], ()) if not existing_keys: # not currently in the state, simplest case new_entry = key, [new_details] + self._empty_parent_info() else: # present at one or more existing other paths. # grab one of them and use it to generate parent # relocation/absent entries. new_entry = key, [new_details] # existing_keys can be changed as we iterate. for other_key in tuple(existing_keys): # change the record at other to be a pointer to this new # record. The loop looks similar to the change to # relocations when updating an existing record but its not: # the test for existing kinds is different: this can be # factored out to a helper though. other_block_index, present = self._find_block_index_from_key( other_key) if not present: raise AssertionError('could not find block for %s' % ( other_key,)) other_block = self._dirblocks[other_block_index][1] other_entry_index, present = self._find_entry_index( other_key, other_block) if not present: raise AssertionError( 'update_minimal: could not find other entry for %s' % (other_key,)) if path_utf8 is None: raise AssertionError('no path') # Turn this other location into a reference to the new # location. This also updates the aliased iterator # (current_old in set_state_from_inventory) so that the old # entry, if not already examined, is skipped over by that # loop. other_entry = other_block[other_entry_index] other_entry[1][0] = ('r', path_utf8, 0, False, '') if self._maybe_remove_row(other_block, other_entry_index, id_index): # If the row holding this was removed, we need to # recompute where this entry goes entry_index, _ = self._find_entry_index(key, block) # This loop: # adds a tuple to the new details for each column # - either by copying an existing relocation pointer inside that column # - or by creating a new pointer to the right row inside that column num_present_parents = self._num_present_parents() if num_present_parents: # TODO: This re-evaluates the existing_keys set, do we need # to do that ourselves? other_key = list(existing_keys)[0] for lookup_index in xrange(1, num_present_parents + 1): # grab any one entry, use it to find the right path. # TODO: optimise this to reduce memory use in highly # fragmented situations by reusing the relocation # records. update_block_index, present = \ self._find_block_index_from_key(other_key) if not present: raise AssertionError('could not find block for %s' % (other_key,)) update_entry_index, present = \ self._find_entry_index(other_key, self._dirblocks[update_block_index][1]) if not present: raise AssertionError('update_minimal: could not find entry for %s' % (other_key,)) update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index] if update_details[0] in 'ar': # relocated, absent # its a pointer or absent in lookup_index's tree, use # it as is. new_entry[1].append(update_details) else: # we have the right key, make a pointer to it. pointer_path = osutils.pathjoin(*other_key[0:2]) new_entry[1].append(('r', pointer_path, 0, False, '')) block.insert(entry_index, new_entry) self._add_to_id_index(id_index, key) else: # Does the new state matter? block[entry_index][1][0] = new_details # parents cannot be affected by what we do. # other occurences of this id can be found # from the id index. # --- # tree index consistency: All other paths for this id in this tree # index must point to the correct path. We have to loop here because # we may have passed entries in the state with this file id already # that were absent - where parent entries are - and they need to be # converted to relocated. if path_utf8 is None: raise AssertionError('no path') existing_keys = id_index.get(key[2], ()) if key not in existing_keys: raise AssertionError('We found the entry in the blocks, but' ' the key is not in the id_index.' ' key: %s, existing_keys: %s' % (key, existing_keys)) for entry_key in existing_keys: # TODO:PROFILING: It might be faster to just update # rather than checking if we need to, and then overwrite # the one we are located at. if entry_key != key: # this file id is at a different path in one of the # other trees, so put absent pointers there # This is the vertical axis in the matrix, all pointing # to the real path. block_index, present = self._find_block_index_from_key(entry_key) if not present: raise AssertionError('not present: %r', entry_key) entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1]) if not present: raise AssertionError('not present: %r', entry_key) self._dirblocks[block_index][1][entry_index][1][0] = \ ('r', path_utf8, 0, False, '') # add a containing dirblock if needed. if new_details[0] == 'd': subdir_key = (osutils.pathjoin(*key[0:2]), '', '') block_index, present = self._find_block_index_from_key(subdir_key) if not present: self._dirblocks.insert(block_index, (subdir_key[0], [])) self._mark_modified() def _maybe_remove_row(self, block, index, id_index): """Remove index if it is absent or relocated across the row. id_index is updated accordingly. :return: True if we removed the row, False otherwise """ present_in_row = False entry = block[index] for column in entry[1]: if column[0] not in 'ar': present_in_row = True break if not present_in_row: block.pop(index) self._remove_from_id_index(id_index, entry[0]) return True return False def _validate(self): """Check that invariants on the dirblock are correct. This can be useful in debugging; it shouldn't be necessary in normal code. This must be called with a lock held. """ # NOTE: This must always raise AssertionError not just assert, # otherwise it may not behave properly under python -O # # TODO: All entries must have some content that's not 'a' or 'r', # otherwise it could just be removed. # # TODO: All relocations must point directly to a real entry. # # TODO: No repeated keys. # # -- mbp 20070325 from pprint import pformat self._read_dirblocks_if_needed() if len(self._dirblocks) > 0: if not self._dirblocks[0][0] == '': raise AssertionError( "dirblocks don't start with root block:\n" + \ pformat(self._dirblocks)) if len(self._dirblocks) > 1: if not self._dirblocks[1][0] == '': raise AssertionError( "dirblocks missing root directory:\n" + \ pformat(self._dirblocks)) # the dirblocks are sorted by their path components, name, and dir id dir_names = [d[0].split('/') for d in self._dirblocks[1:]] if dir_names != sorted(dir_names): raise AssertionError( "dir names are not in sorted order:\n" + \ pformat(self._dirblocks) + \ "\nkeys:\n" + pformat(dir_names)) for dirblock in self._dirblocks: # within each dirblock, the entries are sorted by filename and # then by id. for entry in dirblock[1]: if dirblock[0] != entry[0][0]: raise AssertionError( "entry key for %r" "doesn't match directory name in\n%r" % (entry, pformat(dirblock))) if dirblock[1] != sorted(dirblock[1]): raise AssertionError( "dirblock for %r is not sorted:\n%s" % \ (dirblock[0], pformat(dirblock))) def check_valid_parent(): """Check that the current entry has a valid parent. This makes sure that the parent has a record, and that the parent isn't marked as "absent" in the current tree. (It is invalid to have a non-absent file in an absent directory.) """ if entry[0][0:2] == ('', ''): # There should be no parent for the root row return parent_entry = self._get_entry(tree_index, path_utf8=entry[0][0]) if parent_entry == (None, None): raise AssertionError( "no parent entry for: %s in tree %s" % (this_path, tree_index)) if parent_entry[1][tree_index][0] != 'd': raise AssertionError( "Parent entry for %s is not marked as a valid" " directory. %s" % (this_path, parent_entry,)) # For each file id, for each tree: either # the file id is not present at all; all rows with that id in the # key have it marked as 'absent' # OR the file id is present under exactly one name; any other entries # that mention that id point to the correct name. # # We check this with a dict per tree pointing either to the present # name, or None if absent. tree_count = self._num_present_parents() + 1 id_path_maps = [dict() for i in range(tree_count)] # Make sure that all renamed entries point to the correct location. for entry in self._iter_entries(): file_id = entry[0][2] this_path = osutils.pathjoin(entry[0][0], entry[0][1]) if len(entry[1]) != tree_count: raise AssertionError( "wrong number of entry details for row\n%s" \ ",\nexpected %d" % \ (pformat(entry), tree_count)) absent_positions = 0 for tree_index, tree_state in enumerate(entry[1]): this_tree_map = id_path_maps[tree_index] minikind = tree_state[0] if minikind in 'ar': absent_positions += 1 # have we seen this id before in this column? if file_id in this_tree_map: previous_path, previous_loc = this_tree_map[file_id] # any later mention of this file must be consistent with # what was said before if minikind == 'a': if previous_path is not None: raise AssertionError( "file %s is absent in row %r but also present " \ "at %r"% \ (file_id, entry, previous_path)) elif minikind == 'r': target_location = tree_state[1] if previous_path != target_location: raise AssertionError( "file %s relocation in row %r but also at %r" \ % (file_id, entry, previous_path)) else: # a file, directory, etc - may have been previously # pointed to by a relocation, which must point here if previous_path != this_path: raise AssertionError( "entry %r inconsistent with previous path %r " "seen at %r" % (entry, previous_path, previous_loc)) check_valid_parent() else: if minikind == 'a': # absent; should not occur anywhere else this_tree_map[file_id] = None, this_path elif minikind == 'r': # relocation, must occur at expected location this_tree_map[file_id] = tree_state[1], this_path else: this_tree_map[file_id] = this_path, this_path check_valid_parent() if absent_positions == tree_count: raise AssertionError( "entry %r has no data for any tree." % (entry,)) if self._id_index is not None: for file_id, entry_keys in self._id_index.iteritems(): for entry_key in entry_keys: # Check that the entry in the map is pointing to the same # file_id if entry_key[2] != file_id: raise AssertionError( 'file_id %r did not match entry key %s' % (file_id, entry_key)) # And that from this entry key, we can look up the original # record block_index, present = self._find_block_index_from_key(entry_key) if not present: raise AssertionError('missing block for entry key: %r', entry_key) entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1]) if not present: raise AssertionError('missing entry for key: %r', entry_key) if len(entry_keys) != len(set(entry_keys)): raise AssertionError( 'id_index contained non-unique data for %s' % (entry_keys,)) def _wipe_state(self): """Forget all state information about the dirstate.""" self._header_state = DirState.NOT_IN_MEMORY self._dirblock_state = DirState.NOT_IN_MEMORY self._changes_aborted = False self._parents = [] self._ghosts = [] self._dirblocks = [] self._id_index = None self._packed_stat_index = None self._end_of_header = None self._cutoff_time = None self._split_path_cache = {} def lock_read(self): """Acquire a read lock on the dirstate.""" if self._lock_token is not None: raise errors.LockContention(self._lock_token) # TODO: jam 20070301 Rather than wiping completely, if the blocks are # already in memory, we could read just the header and check for # any modification. If not modified, we can just leave things # alone self._lock_token = lock.ReadLock(self._filename) self._lock_state = 'r' self._state_file = self._lock_token.f self._wipe_state() def lock_write(self): """Acquire a write lock on the dirstate.""" if self._lock_token is not None: raise errors.LockContention(self._lock_token) # TODO: jam 20070301 Rather than wiping completely, if the blocks are # already in memory, we could read just the header and check for # any modification. If not modified, we can just leave things # alone self._lock_token = lock.WriteLock(self._filename) self._lock_state = 'w' self._state_file = self._lock_token.f self._wipe_state() def unlock(self): """Drop any locks held on the dirstate.""" if self._lock_token is None: raise errors.LockNotHeld(self) # TODO: jam 20070301 Rather than wiping completely, if the blocks are # already in memory, we could read just the header and check for # any modification. If not modified, we can just leave things # alone self._state_file = None self._lock_state = None self._lock_token.unlock() self._lock_token = None self._split_path_cache = {} def _requires_lock(self): """Check that a lock is currently held by someone on the dirstate.""" if not self._lock_token: raise errors.ObjectNotLocked(self) def py_update_entry(state, entry, abspath, stat_value, _stat_to_minikind=DirState._stat_to_minikind): """Update the entry based on what is actually on disk. This function only calculates the sha if it needs to - if the entry is uncachable, or clearly different to the first parent's entry, no sha is calculated, and None is returned. :param state: The dirstate this entry is in. :param entry: This is the dirblock entry for the file in question. :param abspath: The path on disk for this file. :param stat_value: The stat value done on the path. :return: None, or The sha1 hexdigest of the file (40 bytes) or link target of a symlink. """ try: minikind = _stat_to_minikind[stat_value.st_mode & 0170000] except KeyError: # Unhandled kind return None packed_stat = pack_stat(stat_value) (saved_minikind, saved_link_or_sha1, saved_file_size, saved_executable, saved_packed_stat) = entry[1][0] if minikind == 'd' and saved_minikind == 't': minikind = 't' if (minikind == saved_minikind and packed_stat == saved_packed_stat): # The stat hasn't changed since we saved, so we can re-use the # saved sha hash. if minikind == 'd': return None # size should also be in packed_stat if saved_file_size == stat_value.st_size: return saved_link_or_sha1 # If we have gotten this far, that means that we need to actually # process this entry. link_or_sha1 = None worth_saving = True if minikind == 'f': executable = state._is_executable(stat_value.st_mode, saved_executable) if state._cutoff_time is None: state._sha_cutoff_time() if (stat_value.st_mtime < state._cutoff_time and stat_value.st_ctime < state._cutoff_time and len(entry[1]) > 1 and entry[1][1][0] != 'a'): # Could check for size changes for further optimised # avoidance of sha1's. However the most prominent case of # over-shaing is during initial add, which this catches. # Besides, if content filtering happens, size and sha # are calculated at the same time, so checking just the size # gains nothing w.r.t. performance. link_or_sha1 = state._sha1_file(abspath) entry[1][0] = ('f', link_or_sha1, stat_value.st_size, executable, packed_stat) else: entry[1][0] = ('f', '', stat_value.st_size, executable, DirState.NULLSTAT) worth_saving = False elif minikind == 'd': link_or_sha1 = None entry[1][0] = ('d', '', 0, False, packed_stat) if saved_minikind != 'd': # This changed from something into a directory. Make sure we # have a directory block for it. This doesn't happen very # often, so this doesn't have to be super fast. block_index, entry_index, dir_present, file_present = \ state._get_block_entry_index(entry[0][0], entry[0][1], 0) state._ensure_block(block_index, entry_index, osutils.pathjoin(entry[0][0], entry[0][1])) else: worth_saving = False elif minikind == 'l': if saved_minikind == 'l': worth_saving = False link_or_sha1 = state._read_link(abspath, saved_link_or_sha1) if state._cutoff_time is None: state._sha_cutoff_time() if (stat_value.st_mtime < state._cutoff_time and stat_value.st_ctime < state._cutoff_time): entry[1][0] = ('l', link_or_sha1, stat_value.st_size, False, packed_stat) else: entry[1][0] = ('l', '', stat_value.st_size, False, DirState.NULLSTAT) if worth_saving: state._mark_modified([entry]) return link_or_sha1 class ProcessEntryPython(object): __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "last_source_parent", "last_target_parent", "include_unchanged", "partial", "use_filesystem_for_exec", "utf8_decode", "searched_specific_files", "search_specific_files", "searched_exact_paths", "search_specific_file_parents", "seen_ids", "state", "source_index", "target_index", "want_unversioned", "tree"] def __init__(self, include_unchanged, use_filesystem_for_exec, search_specific_files, state, source_index, target_index, want_unversioned, tree): self.old_dirname_to_file_id = {} self.new_dirname_to_file_id = {} # Are we doing a partial iter_changes? self.partial = search_specific_files != set(['']) # Using a list so that we can access the values and change them in # nested scope. Each one is [path, file_id, entry] self.last_source_parent = [None, None] self.last_target_parent = [None, None] self.include_unchanged = include_unchanged self.use_filesystem_for_exec = use_filesystem_for_exec self.utf8_decode = cache_utf8._utf8_decode # for all search_indexs in each path at or under each element of # search_specific_files, if the detail is relocated: add the id, and # add the relocated path as one to search if its not searched already. # If the detail is not relocated, add the id. self.searched_specific_files = set() # When we search exact paths without expanding downwards, we record # that here. self.searched_exact_paths = set() self.search_specific_files = search_specific_files # The parents up to the root of the paths we are searching. # After all normal paths are returned, these specific items are returned. self.search_specific_file_parents = set() # The ids we've sent out in the delta. self.seen_ids = set() self.state = state self.source_index = source_index self.target_index = target_index if target_index != 0: # A lot of code in here depends on target_index == 0 raise errors.BzrError('unsupported target index') self.want_unversioned = want_unversioned self.tree = tree def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin): """Compare an entry and real disk to generate delta information. :param path_info: top_relpath, basename, kind, lstat, abspath for the path of entry. If None, then the path is considered absent in the target (Perhaps we should pass in a concrete entry for this ?) Basename is returned as a utf8 string because we expect this tuple will be ignored, and don't want to take the time to decode. :return: (iter_changes_result, changed). If the entry has not been handled then changed is None. Otherwise it is False if no content or metadata changes have occurred, and True if any content or metadata change has occurred. If self.include_unchanged is True then if changed is not None, iter_changes_result will always be a result tuple. Otherwise, iter_changes_result is None unless changed is True. """ if self.source_index is None: source_details = DirState.NULL_PARENT_DETAILS else: source_details = entry[1][self.source_index] target_details = entry[1][self.target_index] target_minikind = target_details[0] if path_info is not None and target_minikind in 'fdlt': if not (self.target_index == 0): raise AssertionError() link_or_sha1 = update_entry(self.state, entry, abspath=path_info[4], stat_value=path_info[3]) # The entry may have been modified by update_entry target_details = entry[1][self.target_index] target_minikind = target_details[0] else: link_or_sha1 = None file_id = entry[0][2] source_minikind = source_details[0] if source_minikind in 'fdltr' and target_minikind in 'fdlt': # claimed content in both: diff # r | fdlt | | add source to search, add id path move and perform # | | | diff check on source-target # r | fdlt | a | dangling file that was present in the basis. # | | | ??? if source_minikind in 'r': # add the source to the search path to find any children it # has. TODO ? : only add if it is a container ? if not osutils.is_inside_any(self.searched_specific_files, source_details[1]): self.search_specific_files.add(source_details[1]) # generate the old path; this is needed for stating later # as well. old_path = source_details[1] old_dirname, old_basename = os.path.split(old_path) path = pathjoin(entry[0][0], entry[0][1]) old_entry = self.state._get_entry(self.source_index, path_utf8=old_path) # update the source details variable to be the real # location. if old_entry == (None, None): raise errors.CorruptDirstate(self.state._filename, "entry '%s/%s' is considered renamed from %r" " but source does not exist\n" "entry: %s" % (entry[0][0], entry[0][1], old_path, entry)) source_details = old_entry[1][self.source_index] source_minikind = source_details[0] else: old_dirname = entry[0][0] old_basename = entry[0][1] old_path = path = None if path_info is None: # the file is missing on disk, show as removed. content_change = True target_kind = None target_exec = False else: # source and target are both versioned and disk file is present. target_kind = path_info[2] if target_kind == 'directory': if path is None: old_path = path = pathjoin(old_dirname, old_basename) self.new_dirname_to_file_id[path] = file_id if source_minikind != 'd': content_change = True else: # directories have no fingerprint content_change = False target_exec = False elif target_kind == 'file': if source_minikind != 'f': content_change = True else: # Check the sha. We can't just rely on the size as # content filtering may mean differ sizes actually # map to the same content if link_or_sha1 is None: # Stat cache miss: statvalue, link_or_sha1 = \ self.state._sha1_provider.stat_and_sha1( path_info[4]) self.state._observed_sha1(entry, link_or_sha1, statvalue) content_change = (link_or_sha1 != source_details[1]) # Target details is updated at update_entry time if self.use_filesystem_for_exec: # We don't need S_ISREG here, because we are sure # we are dealing with a file. target_exec = bool(stat.S_IEXEC & path_info[3].st_mode) else: target_exec = target_details[3] elif target_kind == 'symlink': if source_minikind != 'l': content_change = True else: content_change = (link_or_sha1 != source_details[1]) target_exec = False elif target_kind == 'tree-reference': if source_minikind != 't': content_change = True else: content_change = False target_exec = False else: if path is None: path = pathjoin(old_dirname, old_basename) raise errors.BadFileKindError(path, path_info[2]) if source_minikind == 'd': if path is None: old_path = path = pathjoin(old_dirname, old_basename) self.old_dirname_to_file_id[old_path] = file_id # parent id is the entry for the path in the target tree if old_basename and old_dirname == self.last_source_parent[0]: source_parent_id = self.last_source_parent[1] else: try: source_parent_id = self.old_dirname_to_file_id[old_dirname] except KeyError: source_parent_entry = self.state._get_entry(self.source_index, path_utf8=old_dirname) source_parent_id = source_parent_entry[0][2] if source_parent_id == entry[0][2]: # This is the root, so the parent is None source_parent_id = None else: self.last_source_parent[0] = old_dirname self.last_source_parent[1] = source_parent_id new_dirname = entry[0][0] if entry[0][1] and new_dirname == self.last_target_parent[0]: target_parent_id = self.last_target_parent[1] else: try: target_parent_id = self.new_dirname_to_file_id[new_dirname] except KeyError: # TODO: We don't always need to do the lookup, because the # parent entry will be the same as the source entry. target_parent_entry = self.state._get_entry(self.target_index, path_utf8=new_dirname) if target_parent_entry == (None, None): raise AssertionError( "Could not find target parent in wt: %s\nparent of: %s" % (new_dirname, entry)) target_parent_id = target_parent_entry[0][2] if target_parent_id == entry[0][2]: # This is the root, so the parent is None target_parent_id = None else: self.last_target_parent[0] = new_dirname self.last_target_parent[1] = target_parent_id source_exec = source_details[3] changed = (content_change or source_parent_id != target_parent_id or old_basename != entry[0][1] or source_exec != target_exec ) if not changed and not self.include_unchanged: return None, False else: if old_path is None: old_path = path = pathjoin(old_dirname, old_basename) old_path_u = self.utf8_decode(old_path)[0] path_u = old_path_u else: old_path_u = self.utf8_decode(old_path)[0] if old_path == path: path_u = old_path_u else: path_u = self.utf8_decode(path)[0] source_kind = DirState._minikind_to_kind[source_minikind] return (entry[0][2], (old_path_u, path_u), content_change, (True, True), (source_parent_id, target_parent_id), (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]), (source_kind, target_kind), (source_exec, target_exec)), changed elif source_minikind in 'a' and target_minikind in 'fdlt': # looks like a new file path = pathjoin(entry[0][0], entry[0][1]) # parent id is the entry for the path in the target tree # TODO: these are the same for an entire directory: cache em. parent_id = self.state._get_entry(self.target_index, path_utf8=entry[0][0])[0][2] if parent_id == entry[0][2]: parent_id = None if path_info is not None: # Present on disk: if self.use_filesystem_for_exec: # We need S_ISREG here, because we aren't sure if this # is a file or not. target_exec = bool( stat.S_ISREG(path_info[3].st_mode) and stat.S_IEXEC & path_info[3].st_mode) else: target_exec = target_details[3] return (entry[0][2], (None, self.utf8_decode(path)[0]), True, (False, True), (None, parent_id), (None, self.utf8_decode(entry[0][1])[0]), (None, path_info[2]), (None, target_exec)), True else: # Its a missing file, report it as such. return (entry[0][2], (None, self.utf8_decode(path)[0]), False, (False, True), (None, parent_id), (None, self.utf8_decode(entry[0][1])[0]), (None, None), (None, False)), True elif source_minikind in 'fdlt' and target_minikind in 'a': # unversioned, possibly, or possibly not deleted: we dont care. # if its still on disk, *and* theres no other entry at this # path [we dont know this in this routine at the moment - # perhaps we should change this - then it would be an unknown. old_path = pathjoin(entry[0][0], entry[0][1]) # parent id is the entry for the path in the target tree parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2] if parent_id == entry[0][2]: parent_id = None return (entry[0][2], (self.utf8_decode(old_path)[0], None), True, (True, False), (parent_id, None), (self.utf8_decode(entry[0][1])[0], None), (DirState._minikind_to_kind[source_minikind], None), (source_details[3], None)), True elif source_minikind in 'fdlt' and target_minikind in 'r': # a rename; could be a true rename, or a rename inherited from # a renamed parent. TODO: handle this efficiently. Its not # common case to rename dirs though, so a correct but slow # implementation will do. if not osutils.is_inside_any(self.searched_specific_files, target_details[1]): self.search_specific_files.add(target_details[1]) elif source_minikind in 'ra' and target_minikind in 'ra': # neither of the selected trees contain this file, # so skip over it. This is not currently directly tested, but # is indirectly via test_too_much.TestCommands.test_conflicts. pass else: raise AssertionError("don't know how to compare " "source_minikind=%r, target_minikind=%r" % (source_minikind, target_minikind)) return None, None def __iter__(self): return self def _gather_result_for_consistency(self, result): """Check a result we will yield to make sure we are consistent later. This gathers result's parents into a set to output later. :param result: A result tuple. """ if not self.partial or not result[0]: return self.seen_ids.add(result[0]) new_path = result[1][1] if new_path: # Not the root and not a delete: queue up the parents of the path. self.search_specific_file_parents.update( osutils.parent_directories(new_path.encode('utf8'))) # Add the root directory which parent_directories does not # provide. self.search_specific_file_parents.add('') def iter_changes(self): """Iterate over the changes.""" utf8_decode = cache_utf8._utf8_decode _cmp_by_dirs = cmp_by_dirs _process_entry = self._process_entry search_specific_files = self.search_specific_files searched_specific_files = self.searched_specific_files splitpath = osutils.splitpath # sketch: # compare source_index and target_index at or under each element of search_specific_files. # follow the following comparison table. Note that we only want to do diff operations when # the target is fdl because thats when the walkdirs logic will have exposed the pathinfo # for the target. # cases: # # Source | Target | disk | action # r | fdlt | | add source to search, add id path move and perform # | | | diff check on source-target # r | fdlt | a | dangling file that was present in the basis. # | | | ??? # r | a | | add source to search # r | a | a | # r | r | | this path is present in a non-examined tree, skip. # r | r | a | this path is present in a non-examined tree, skip. # a | fdlt | | add new id # a | fdlt | a | dangling locally added file, skip # a | a | | not present in either tree, skip # a | a | a | not present in any tree, skip # a | r | | not present in either tree at this path, skip as it # | | | may not be selected by the users list of paths. # a | r | a | not present in either tree at this path, skip as it # | | | may not be selected by the users list of paths. # fdlt | fdlt | | content in both: diff them # fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ? # fdlt | a | | unversioned: output deleted id for now # fdlt | a | a | unversioned and deleted: output deleted id # fdlt | r | | relocated in this tree, so add target to search. # | | | Dont diff, we will see an r,fd; pair when we reach # | | | this id at the other path. # fdlt | r | a | relocated in this tree, so add target to search. # | | | Dont diff, we will see an r,fd; pair when we reach # | | | this id at the other path. # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by # keeping a cache of directories that we have seen. while search_specific_files: # TODO: the pending list should be lexically sorted? the # interface doesn't require it. current_root = search_specific_files.pop() current_root_unicode = current_root.decode('utf8') searched_specific_files.add(current_root) # process the entries for this containing directory: the rest will be # found by their parents recursively. root_entries = self.state._entries_for_path(current_root) root_abspath = self.tree.abspath(current_root_unicode) try: root_stat = os.lstat(root_abspath) except OSError, e: if e.errno == errno.ENOENT: # the path does not exist: let _process_entry know that. root_dir_info = None else: # some other random error: hand it up. raise else: root_dir_info = ('', current_root, osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat, root_abspath) if root_dir_info[2] == 'directory': if self.tree._directory_is_tree_reference( current_root.decode('utf8')): root_dir_info = root_dir_info[:2] + \ ('tree-reference',) + root_dir_info[3:] if not root_entries and not root_dir_info: # this specified path is not present at all, skip it. continue path_handled = False for entry in root_entries: result, changed = _process_entry(entry, root_dir_info) if changed is not None: path_handled = True if changed: self._gather_result_for_consistency(result) if changed or self.include_unchanged: yield result if self.want_unversioned and not path_handled and root_dir_info: new_executable = bool( stat.S_ISREG(root_dir_info[3].st_mode) and stat.S_IEXEC & root_dir_info[3].st_mode) yield (None, (None, current_root_unicode), True, (False, False), (None, None), (None, splitpath(current_root_unicode)[-1]), (None, root_dir_info[2]), (None, new_executable) ) initial_key = (current_root, '', '') block_index, _ = self.state._find_block_index_from_key(initial_key) if block_index == 0: # we have processed the total root already, but because the # initial key matched it we should skip it here. block_index +=1 if root_dir_info and root_dir_info[2] == 'tree-reference': current_dir_info = None else: dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root) try: current_dir_info = dir_iterator.next() except OSError, e: # on win32, python2.4 has e.errno == ERROR_DIRECTORY, but # python 2.5 has e.errno == EINVAL, # and e.winerror == ERROR_DIRECTORY e_winerror = getattr(e, 'winerror', None) win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND) # there may be directories in the inventory even though # this path is not a file on disk: so mark it as end of # iterator if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL): current_dir_info = None elif (sys.platform == 'win32' and (e.errno in win_errors or e_winerror in win_errors)): current_dir_info = None else: raise else: if current_dir_info[0][0] == '': # remove .bzr from iteration bzr_index = bisect.bisect_left(current_dir_info[1], ('.bzr',)) if current_dir_info[1][bzr_index][0] != '.bzr': raise AssertionError() del current_dir_info[1][bzr_index] # walk until both the directory listing and the versioned metadata # are exhausted. if (block_index < len(self.state._dirblocks) and osutils.is_inside(current_root, self.state._dirblocks[block_index][0])): current_block = self.state._dirblocks[block_index] else: current_block = None while (current_dir_info is not None or current_block is not None): if (current_dir_info and current_block and current_dir_info[0][0] != current_block[0]): if _cmp_by_dirs(current_dir_info[0][0], current_block[0]) < 0: # filesystem data refers to paths not covered by the dirblock. # this has two possibilities: # A) it is versioned but empty, so there is no block for it # B) it is not versioned. # if (A) then we need to recurse into it to check for # new unknown files or directories. # if (B) then we should ignore it, because we don't # recurse into unknown directories. path_index = 0 while path_index < len(current_dir_info[1]): current_path_info = current_dir_info[1][path_index] if self.want_unversioned: if current_path_info[2] == 'directory': if self.tree._directory_is_tree_reference( current_path_info[0].decode('utf8')): current_path_info = current_path_info[:2] + \ ('tree-reference',) + current_path_info[3:] new_executable = bool( stat.S_ISREG(current_path_info[3].st_mode) and stat.S_IEXEC & current_path_info[3].st_mode) yield (None, (None, utf8_decode(current_path_info[0])[0]), True, (False, False), (None, None), (None, utf8_decode(current_path_info[1])[0]), (None, current_path_info[2]), (None, new_executable)) # dont descend into this unversioned path if it is # a dir if current_path_info[2] in ('directory', 'tree-reference'): del current_dir_info[1][path_index] path_index -= 1 path_index += 1 # This dir info has been handled, go to the next try: current_dir_info = dir_iterator.next() except StopIteration: current_dir_info = None else: # We have a dirblock entry for this location, but there # is no filesystem path for this. This is most likely # because a directory was removed from the disk. # We don't have to report the missing directory, # because that should have already been handled, but we # need to handle all of the files that are contained # within. for current_entry in current_block[1]: # entry referring to file not present on disk. # advance the entry only, after processing. result, changed = _process_entry(current_entry, None) if changed is not None: if changed: self._gather_result_for_consistency(result) if changed or self.include_unchanged: yield result block_index +=1 if (block_index < len(self.state._dirblocks) and osutils.is_inside(current_root, self.state._dirblocks[block_index][0])): current_block = self.state._dirblocks[block_index] else: current_block = None continue entry_index = 0 if current_block and entry_index < len(current_block[1]): current_entry = current_block[1][entry_index] else: current_entry = None advance_entry = True path_index = 0 if current_dir_info and path_index < len(current_dir_info[1]): current_path_info = current_dir_info[1][path_index] if current_path_info[2] == 'directory': if self.tree._directory_is_tree_reference( current_path_info[0].decode('utf8')): current_path_info = current_path_info[:2] + \ ('tree-reference',) + current_path_info[3:] else: current_path_info = None advance_path = True path_handled = False while (current_entry is not None or current_path_info is not None): if current_entry is None: # the check for path_handled when the path is advanced # will yield this path if needed. pass elif current_path_info is None: # no path is fine: the per entry code will handle it. result, changed = _process_entry(current_entry, current_path_info) if changed is not None: if changed: self._gather_result_for_consistency(result) if changed or self.include_unchanged: yield result elif (current_entry[0][1] != current_path_info[1] or current_entry[1][self.target_index][0] in 'ar'): # The current path on disk doesn't match the dirblock # record. Either the dirblock is marked as absent, or # the file on disk is not present at all in the # dirblock. Either way, report about the dirblock # entry, and let other code handle the filesystem one. # Compare the basename for these files to determine # which comes first if current_path_info[1] < current_entry[0][1]: # extra file on disk: pass for now, but only # increment the path, not the entry advance_entry = False else: # entry referring to file not present on disk. # advance the entry only, after processing. result, changed = _process_entry(current_entry, None) if changed is not None: if changed: self._gather_result_for_consistency(result) if changed or self.include_unchanged: yield result advance_path = False else: result, changed = _process_entry(current_entry, current_path_info) if changed is not None: path_handled = True if changed: self._gather_result_for_consistency(result) if changed or self.include_unchanged: yield result if advance_entry and current_entry is not None: entry_index += 1 if entry_index < len(current_block[1]): current_entry = current_block[1][entry_index] else: current_entry = None else: advance_entry = True # reset the advance flaga if advance_path and current_path_info is not None: if not path_handled: # unversioned in all regards if self.want_unversioned: new_executable = bool( stat.S_ISREG(current_path_info[3].st_mode) and stat.S_IEXEC & current_path_info[3].st_mode) try: relpath_unicode = utf8_decode(current_path_info[0])[0] except UnicodeDecodeError: raise errors.BadFilenameEncoding( current_path_info[0], osutils._fs_enc) yield (None, (None, relpath_unicode), True, (False, False), (None, None), (None, utf8_decode(current_path_info[1])[0]), (None, current_path_info[2]), (None, new_executable)) # dont descend into this unversioned path if it is # a dir if current_path_info[2] in ('directory'): del current_dir_info[1][path_index] path_index -= 1 # dont descend the disk iterator into any tree # paths. if current_path_info[2] == 'tree-reference': del current_dir_info[1][path_index] path_index -= 1 path_index += 1 if path_index < len(current_dir_info[1]): current_path_info = current_dir_info[1][path_index] if current_path_info[2] == 'directory': if self.tree._directory_is_tree_reference( current_path_info[0].decode('utf8')): current_path_info = current_path_info[:2] + \ ('tree-reference',) + current_path_info[3:] else: current_path_info = None path_handled = False else: advance_path = True # reset the advance flagg. if current_block is not None: block_index += 1 if (block_index < len(self.state._dirblocks) and osutils.is_inside(current_root, self.state._dirblocks[block_index][0])): current_block = self.state._dirblocks[block_index] else: current_block = None if current_dir_info is not None: try: current_dir_info = dir_iterator.next() except StopIteration: current_dir_info = None for result in self._iter_specific_file_parents(): yield result def _iter_specific_file_parents(self): """Iter over the specific file parents.""" while self.search_specific_file_parents: # Process the parent directories for the paths we were iterating. # Even in extremely large trees this should be modest, so currently # no attempt is made to optimise. path_utf8 = self.search_specific_file_parents.pop() if osutils.is_inside_any(self.searched_specific_files, path_utf8): # We've examined this path. continue if path_utf8 in self.searched_exact_paths: # We've examined this path. continue path_entries = self.state._entries_for_path(path_utf8) # We need either one or two entries. If the path in # self.target_index has moved (so the entry in source_index is in # 'ar') then we need to also look for the entry for this path in # self.source_index, to output the appropriate delete-or-rename. selected_entries = [] found_item = False for candidate_entry in path_entries: # Find entries present in target at this path: if candidate_entry[1][self.target_index][0] not in 'ar': found_item = True selected_entries.append(candidate_entry) # Find entries present in source at this path: elif (self.source_index is not None and candidate_entry[1][self.source_index][0] not in 'ar'): found_item = True if candidate_entry[1][self.target_index][0] == 'a': # Deleted, emit it here. selected_entries.append(candidate_entry) else: # renamed, emit it when we process the directory it # ended up at. self.search_specific_file_parents.add( candidate_entry[1][self.target_index][1]) if not found_item: raise AssertionError( "Missing entry for specific path parent %r, %r" % ( path_utf8, path_entries)) path_info = self._path_info(path_utf8, path_utf8.decode('utf8')) for entry in selected_entries: if entry[0][2] in self.seen_ids: continue result, changed = self._process_entry(entry, path_info) if changed is None: raise AssertionError( "Got entry<->path mismatch for specific path " "%r entry %r path_info %r " % ( path_utf8, entry, path_info)) # Only include changes - we're outside the users requested # expansion. if changed: self._gather_result_for_consistency(result) if (result[6][0] == 'directory' and result[6][1] != 'directory'): # This stopped being a directory, the old children have # to be included. if entry[1][self.source_index][0] == 'r': # renamed, take the source path entry_path_utf8 = entry[1][self.source_index][1] else: entry_path_utf8 = path_utf8 initial_key = (entry_path_utf8, '', '') block_index, _ = self.state._find_block_index_from_key( initial_key) if block_index == 0: # The children of the root are in block index 1. block_index +=1 current_block = None if block_index < len(self.state._dirblocks): current_block = self.state._dirblocks[block_index] if not osutils.is_inside( entry_path_utf8, current_block[0]): # No entries for this directory at all. current_block = None if current_block is not None: for entry in current_block[1]: if entry[1][self.source_index][0] in 'ar': # Not in the source tree, so doesn't have to be # included. continue # Path of the entry itself. self.search_specific_file_parents.add( osutils.pathjoin(*entry[0][:2])) if changed or self.include_unchanged: yield result self.searched_exact_paths.add(path_utf8) def _path_info(self, utf8_path, unicode_path): """Generate path_info for unicode_path. :return: None if unicode_path does not exist, or a path_info tuple. """ abspath = self.tree.abspath(unicode_path) try: stat = os.lstat(abspath) except OSError, e: if e.errno == errno.ENOENT: # the path does not exist. return None else: raise utf8_basename = utf8_path.rsplit('/', 1)[-1] dir_info = (utf8_path, utf8_basename, osutils.file_kind_from_stat_mode(stat.st_mode), stat, abspath) if dir_info[2] == 'directory': if self.tree._directory_is_tree_reference( unicode_path): self.root_dir_info = self.root_dir_info[:2] + \ ('tree-reference',) + self.root_dir_info[3:] return dir_info # Try to load the compiled form if possible try: from bzrlib._dirstate_helpers_pyx import ( _read_dirblocks, bisect_dirblock, _bisect_path_left, _bisect_path_right, cmp_by_dirs, pack_stat, ProcessEntryC as _process_entry, update_entry as update_entry, ) except ImportError, e: osutils.failed_to_load_extension(e) from bzrlib._dirstate_helpers_py import ( _read_dirblocks, bisect_dirblock, _bisect_path_left, _bisect_path_right, cmp_by_dirs, pack_stat, ) # FIXME: It would be nice to be able to track moved lines so that the # corresponding python code can be moved to the _dirstate_helpers_py # module. I don't want to break the history for this important piece of # code so I left the code here -- vila 20090622 update_entry = py_update_entry _process_entry = ProcessEntryPython bzr-2.7.0/bzrlib/doc/0000755000000000000000000000000010353130175012472 5ustar 00000000000000bzr-2.7.0/bzrlib/doc_generate/0000755000000000000000000000000011215617543014353 5ustar 00000000000000bzr-2.7.0/bzrlib/email_message.py0000644000000000000000000002014712321177047015104 0ustar 00000000000000# Copyright (C) 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """A convenience class around email.Message and email.MIMEMultipart.""" from __future__ import absolute_import from email import ( Header, Message, MIMEMultipart, MIMEText, Utils, ) from bzrlib import __version__ as _bzrlib_version from bzrlib.osutils import safe_unicode from bzrlib.smtp_connection import SMTPConnection class EmailMessage(object): """An email message. The constructor needs an origin address, a destination address or addresses and a subject, and accepts a body as well. Add additional parts to the message with add_inline_attachment(). Retrieve the entire formatted message with as_string(). Headers can be accessed with get() and msg[], and modified with msg[] =. """ def __init__(self, from_address, to_address, subject, body=None): """Create an email message. :param from_address: The origin address, to be put on the From header. :param to_address: The destination address of the message, to be put in the To header. Can also be a list of addresses. :param subject: The subject of the message. :param body: If given, the body of the message. All four parameters can be unicode strings or byte strings, but for the addresses and subject byte strings must be encoded in UTF-8. For the body any byte string will be accepted; if it's not ASCII or UTF-8, it'll be sent with charset=8-bit. """ self._headers = {} self._body = body self._parts = [] if isinstance(to_address, basestring): to_address = [ to_address ] to_addresses = [] for addr in to_address: to_addresses.append(self.address_to_encoded_header(addr)) self._headers['To'] = ', '.join(to_addresses) self._headers['From'] = self.address_to_encoded_header(from_address) self._headers['Subject'] = Header.Header(safe_unicode(subject)) self._headers['User-Agent'] = 'Bazaar (%s)' % _bzrlib_version def add_inline_attachment(self, body, filename=None, mime_subtype='plain'): """Add an inline attachment to the message. :param body: A text to attach. Can be an unicode string or a byte string, and it'll be sent as ascii, utf-8, or 8-bit, in that preferred order. :param filename: The name for the attachment. This will give a default name for email programs to save the attachment. :param mime_subtype: MIME subtype of the attachment (eg. 'plain' for text/plain [default]). The attachment body will be displayed inline, so do not use this function to attach binary attachments. """ # add_inline_attachment() has been called, so the message will be a # MIMEMultipart; add the provided body, if any, as the first attachment if self._body is not None: self._parts.append((self._body, None, 'plain')) self._body = None self._parts.append((body, filename, mime_subtype)) def as_string(self, boundary=None): """Return the entire formatted message as a string. :param boundary: The boundary to use between MIME parts, if applicable. Used for tests. """ if not self._parts: msgobj = Message.Message() if self._body is not None: body, encoding = self.string_with_encoding(self._body) msgobj.set_payload(body, encoding) else: msgobj = MIMEMultipart.MIMEMultipart() if boundary is not None: msgobj.set_boundary(boundary) for body, filename, mime_subtype in self._parts: body, encoding = self.string_with_encoding(body) payload = MIMEText.MIMEText(body, mime_subtype, encoding) if filename is not None: content_type = payload['Content-Type'] content_type += '; name="%s"' % filename payload.replace_header('Content-Type', content_type) payload['Content-Disposition'] = 'inline' msgobj.attach(payload) # sort headers here to ease testing for header, value in sorted(self._headers.items()): msgobj[header] = value return msgobj.as_string() __str__ = as_string def get(self, header, failobj=None): """Get a header from the message, returning failobj if not present.""" return self._headers.get(header, failobj) def __getitem__(self, header): """Get a header from the message, returning None if not present. This method intentionally does not raise KeyError to mimic the behavior of __getitem__ in email.Message. """ return self._headers.get(header, None) def __setitem__(self, header, value): return self._headers.__setitem__(header, value) @staticmethod def send(config, from_address, to_address, subject, body, attachment=None, attachment_filename=None, attachment_mime_subtype='plain'): """Create an email message and send it with SMTPConnection. :param config: config object to pass to SMTPConnection constructor. See EmailMessage.__init__() and EmailMessage.add_inline_attachment() for an explanation of the rest of parameters. """ msg = EmailMessage(from_address, to_address, subject, body) if attachment is not None: msg.add_inline_attachment(attachment, attachment_filename, attachment_mime_subtype) SMTPConnection(config).send_email(msg) @staticmethod def address_to_encoded_header(address): """RFC2047-encode an address if necessary. :param address: An unicode string, or UTF-8 byte string. :return: A possibly RFC2047-encoded string. """ # Can't call Header over all the address, because that encodes both the # name and the email address, which is not permitted by RFCs. user, email = Utils.parseaddr(address) if not user: return email else: return Utils.formataddr((str(Header.Header(safe_unicode(user))), email)) @staticmethod def string_with_encoding(string_): """Return a str object together with an encoding. :param string\\_: A str or unicode object. :return: A tuple (str, encoding), where encoding is one of 'ascii', 'utf-8', or '8-bit', in that preferred order. """ # Python's email module base64-encodes the body whenever the charset is # not explicitly set to ascii. Because of this, and because we want to # avoid base64 when it's not necessary in order to be most compatible # with the capabilities of the receiving side, we check with encode() # and decode() whether the body is actually ascii-only. if isinstance(string_, unicode): try: return (string_.encode('ascii'), 'ascii') except UnicodeEncodeError: return (string_.encode('utf-8'), 'utf-8') else: try: string_.decode('ascii') return (string_, 'ascii') except UnicodeDecodeError: try: string_.decode('utf-8') return (string_, 'utf-8') except UnicodeDecodeError: return (string_, '8-bit') bzr-2.7.0/bzrlib/errors.py0000644000000000000000000025470112652143721013631 0ustar 00000000000000# Copyright (C) 2005-2013, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Exceptions for bzr, and reporting of them. """ from __future__ import absolute_import # TODO: is there any value in providing the .args field used by standard # python exceptions? A list of values with no names seems less useful # to me. # TODO: Perhaps convert the exception to a string at the moment it's # constructed to make sure it will succeed. But that says nothing about # exceptions that are never raised. # TODO: selftest assertRaises should probably also check that every error # raised can be formatted as a string successfully, and without giving # 'unprintable'. # return codes from the bzr program EXIT_OK = 0 EXIT_ERROR = 3 EXIT_INTERNAL_ERROR = 4 class BzrError(StandardError): """ Base class for errors raised by bzrlib. :cvar internal_error: if True this was probably caused by a bzr bug and should be displayed with a traceback; if False (or absent) this was probably a user or environment error and they don't need the gory details. (That can be overridden by -Derror on the command line.) :cvar _fmt: Format string to display the error; this is expanded by the instance's dict. """ internal_error = False def __init__(self, msg=None, **kwds): """Construct a new BzrError. There are two alternative forms for constructing these objects. Either a preformatted string may be passed, or a set of named arguments can be given. The first is for generic "user" errors which are not intended to be caught and so do not need a specific subclass. The second case is for use with subclasses that provide a _fmt format string to print the arguments. Keyword arguments are taken as parameters to the error, which can be inserted into the format string template. It's recommended that subclasses override the __init__ method to require specific parameters. :param msg: If given, this is the literal complete text for the error, not subject to expansion. 'msg' is used instead of 'message' because python evolved and, in 2.6, forbids the use of 'message'. """ StandardError.__init__(self) if msg is not None: # I was going to deprecate this, but it actually turns out to be # quite handy - mbp 20061103. self._preformatted_string = msg else: self._preformatted_string = None for key, value in kwds.items(): setattr(self, key, value) def _format(self): s = getattr(self, '_preformatted_string', None) if s is not None: # contains a preformatted message return s try: fmt = self._get_format_string() if fmt: d = dict(self.__dict__) s = fmt % d # __str__() should always return a 'str' object # never a 'unicode' object. return s except Exception, e: pass # just bind to 'e' for formatting below else: e = None return 'Unprintable exception %s: dict=%r, fmt=%r, error=%r' \ % (self.__class__.__name__, self.__dict__, getattr(self, '_fmt', None), e) def __unicode__(self): u = self._format() if isinstance(u, str): # Try decoding the str using the default encoding. u = unicode(u) elif not isinstance(u, unicode): # Try to make a unicode object from it, because __unicode__ must # return a unicode object. u = unicode(u) return u def __str__(self): s = self._format() if isinstance(s, unicode): s = s.encode('utf8') else: # __str__ must return a str. s = str(s) return s def __repr__(self): return '%s(%s)' % (self.__class__.__name__, str(self)) def _get_format_string(self): """Return format string for this exception or None""" fmt = getattr(self, '_fmt', None) if fmt is not None: from bzrlib.i18n import gettext return gettext(unicode(fmt)) # _fmt strings should be ascii def __eq__(self, other): if self.__class__ is not other.__class__: return NotImplemented return self.__dict__ == other.__dict__ class InternalBzrError(BzrError): """Base class for errors that are internal in nature. This is a convenience class for errors that are internal. The internal_error attribute can still be altered in subclasses, if needed. Using this class is simply an easy way to get internal errors. """ internal_error = True class AlreadyBuilding(BzrError): _fmt = "The tree builder is already building a tree." class BranchError(BzrError): """Base class for concrete 'errors about a branch'.""" def __init__(self, branch): BzrError.__init__(self, branch=branch) class BzrCheckError(InternalBzrError): _fmt = "Internal check failed: %(msg)s" def __init__(self, msg): BzrError.__init__(self) self.msg = msg class DirstateCorrupt(BzrError): _fmt = "The dirstate file (%(state)s) appears to be corrupt: %(msg)s" def __init__(self, state, msg): BzrError.__init__(self) self.state = state self.msg = msg class DisabledMethod(InternalBzrError): _fmt = "The smart server method '%(class_name)s' is disabled." def __init__(self, class_name): BzrError.__init__(self) self.class_name = class_name class IncompatibleAPI(BzrError): _fmt = 'The API for "%(api)s" is not compatible with "%(wanted)s". '\ 'It supports versions "%(minimum)s" to "%(current)s".' def __init__(self, api, wanted, minimum, current): self.api = api self.wanted = wanted self.minimum = minimum self.current = current class InProcessTransport(BzrError): _fmt = "The transport '%(transport)s' is only accessible within this " \ "process." def __init__(self, transport): self.transport = transport class InvalidEntryName(InternalBzrError): _fmt = "Invalid entry name: %(name)s" def __init__(self, name): BzrError.__init__(self) self.name = name class InvalidRevisionNumber(BzrError): _fmt = "Invalid revision number %(revno)s" def __init__(self, revno): BzrError.__init__(self) self.revno = revno class InvalidRevisionId(BzrError): _fmt = "Invalid revision-id {%(revision_id)s} in %(branch)s" def __init__(self, revision_id, branch): # branch can be any string or object with __str__ defined BzrError.__init__(self) self.revision_id = revision_id self.branch = branch class ReservedId(BzrError): _fmt = "Reserved revision-id {%(revision_id)s}" def __init__(self, revision_id): self.revision_id = revision_id class RootMissing(InternalBzrError): _fmt = ("The root entry of a tree must be the first entry supplied to " "the commit builder.") class NoPublicBranch(BzrError): _fmt = 'There is no public branch set for "%(branch_url)s".' def __init__(self, branch): import bzrlib.urlutils as urlutils public_location = urlutils.unescape_for_display(branch.base, 'ascii') BzrError.__init__(self, branch_url=public_location) class NoHelpTopic(BzrError): _fmt = ("No help could be found for '%(topic)s'. " "Please use 'bzr help topics' to obtain a list of topics.") def __init__(self, topic): self.topic = topic class NoSuchId(BzrError): _fmt = 'The file id "%(file_id)s" is not present in the tree %(tree)s.' def __init__(self, tree, file_id): BzrError.__init__(self) self.file_id = file_id self.tree = tree class NoSuchIdInRepository(NoSuchId): _fmt = ('The file id "%(file_id)s" is not present in the repository' ' %(repository)r') def __init__(self, repository, file_id): BzrError.__init__(self, repository=repository, file_id=file_id) class NotStacked(BranchError): _fmt = "The branch '%(branch)s' is not stacked." class InventoryModified(InternalBzrError): _fmt = ("The current inventory for the tree %(tree)r has been modified," " so a clean inventory cannot be read without data loss.") def __init__(self, tree): self.tree = tree class NoWorkingTree(BzrError): _fmt = 'No WorkingTree exists for "%(base)s".' def __init__(self, base): BzrError.__init__(self) self.base = base class NotBuilding(BzrError): _fmt = "Not currently building a tree." class NotLocalUrl(BzrError): _fmt = "%(url)s is not a local path." def __init__(self, url): self.url = url class WorkingTreeAlreadyPopulated(InternalBzrError): _fmt = 'Working tree already populated in "%(base)s"' def __init__(self, base): self.base = base class BzrCommandError(BzrError): """Error from user command""" # Error from malformed user command; please avoid raising this as a # generic exception not caused by user input. # # I think it's a waste of effort to differentiate between errors that # are not intended to be caught anyway. UI code need not subclass # BzrCommandError, and non-UI code should not throw a subclass of # BzrCommandError. ADHB 20051211 class NotWriteLocked(BzrError): _fmt = """%(not_locked)r is not write locked but needs to be.""" def __init__(self, not_locked): self.not_locked = not_locked class BzrOptionError(BzrCommandError): _fmt = "Error in command line options" class BadIndexFormatSignature(BzrError): _fmt = "%(value)s is not an index of type %(_type)s." def __init__(self, value, _type): BzrError.__init__(self) self.value = value self._type = _type class BadIndexData(BzrError): _fmt = "Error in data for index %(value)s." def __init__(self, value): BzrError.__init__(self) self.value = value class BadIndexDuplicateKey(BzrError): _fmt = "The key '%(key)s' is already in index '%(index)s'." def __init__(self, key, index): BzrError.__init__(self) self.key = key self.index = index class BadIndexKey(BzrError): _fmt = "The key '%(key)s' is not a valid key." def __init__(self, key): BzrError.__init__(self) self.key = key class BadIndexOptions(BzrError): _fmt = "Could not parse options for index %(value)s." def __init__(self, value): BzrError.__init__(self) self.value = value class BadIndexValue(BzrError): _fmt = "The value '%(value)s' is not a valid value." def __init__(self, value): BzrError.__init__(self) self.value = value class BadOptionValue(BzrError): _fmt = """Bad value "%(value)s" for option "%(name)s".""" def __init__(self, name, value): BzrError.__init__(self, name=name, value=value) class StrictCommitFailed(BzrError): _fmt = "Commit refused because there are unknown files in the tree" # XXX: Should be unified with TransportError; they seem to represent the # same thing # RBC 20060929: I think that unifiying with TransportError would be a mistake # - this is finer than a TransportError - and more useful as such. It # differentiates between 'transport has failed' and 'operation on a transport # has failed.' class PathError(BzrError): _fmt = "Generic path error: %(path)r%(extra)s)" def __init__(self, path, extra=None): BzrError.__init__(self) self.path = path if extra: self.extra = ': ' + str(extra) else: self.extra = '' class NoSuchFile(PathError): _fmt = "No such file: %(path)r%(extra)s" class FileExists(PathError): _fmt = "File exists: %(path)r%(extra)s" class RenameFailedFilesExist(BzrError): """Used when renaming and both source and dest exist.""" _fmt = ("Could not rename %(source)s => %(dest)s because both files exist." " (Use --after to tell bzr about a rename that has already" " happened)%(extra)s") def __init__(self, source, dest, extra=None): BzrError.__init__(self) self.source = str(source) self.dest = str(dest) if extra: self.extra = ' ' + str(extra) else: self.extra = '' class NotADirectory(PathError): _fmt = '"%(path)s" is not a directory %(extra)s' class NotInWorkingDirectory(PathError): _fmt = '"%(path)s" is not in the working directory %(extra)s' class DirectoryNotEmpty(PathError): _fmt = 'Directory not empty: "%(path)s"%(extra)s' class HardLinkNotSupported(PathError): _fmt = 'Hard-linking "%(path)s" is not supported' class ReadingCompleted(InternalBzrError): _fmt = ("The MediumRequest '%(request)s' has already had finish_reading " "called upon it - the request has been completed and no more " "data may be read.") def __init__(self, request): self.request = request class ResourceBusy(PathError): _fmt = 'Device or resource busy: "%(path)s"%(extra)s' class PermissionDenied(PathError): _fmt = 'Permission denied: "%(path)s"%(extra)s' class InvalidURL(PathError): _fmt = 'Invalid url supplied to transport: "%(path)s"%(extra)s' class InvalidURLJoin(PathError): _fmt = "Invalid URL join request: %(reason)s: %(base)r + %(join_args)r" def __init__(self, reason, base, join_args): self.reason = reason self.base = base self.join_args = join_args PathError.__init__(self, base, reason) class InvalidRebaseURLs(PathError): _fmt = "URLs differ by more than path: %(from_)r and %(to)r" def __init__(self, from_, to): self.from_ = from_ self.to = to PathError.__init__(self, from_, 'URLs differ by more than path.') class UnavailableRepresentation(InternalBzrError): _fmt = ("The encoding '%(wanted)s' is not available for key %(key)s which " "is encoded as '%(native)s'.") def __init__(self, key, wanted, native): InternalBzrError.__init__(self) self.wanted = wanted self.native = native self.key = key class UnknownHook(BzrError): _fmt = "The %(type)s hook '%(hook)s' is unknown in this version of bzrlib." def __init__(self, hook_type, hook_name): BzrError.__init__(self) self.type = hook_type self.hook = hook_name class UnsupportedProtocol(PathError): _fmt = 'Unsupported protocol for url "%(path)s"%(extra)s' def __init__(self, url, extra=""): PathError.__init__(self, url, extra=extra) class UnstackableBranchFormat(BzrError): _fmt = ("The branch '%(url)s'(%(format)s) is not a stackable format. " "You will need to upgrade the branch to permit branch stacking.") def __init__(self, format, url): BzrError.__init__(self) self.format = format self.url = url class UnstackableLocationError(BzrError): _fmt = "The branch '%(branch_url)s' cannot be stacked on '%(target_url)s'." def __init__(self, branch_url, target_url): BzrError.__init__(self) self.branch_url = branch_url self.target_url = target_url class UnstackableRepositoryFormat(BzrError): _fmt = ("The repository '%(url)s'(%(format)s) is not a stackable format. " "You will need to upgrade the repository to permit branch stacking.") def __init__(self, format, url): BzrError.__init__(self) self.format = format self.url = url class ReadError(PathError): _fmt = """Error reading from %(path)r.""" class ShortReadvError(PathError): _fmt = ('readv() read %(actual)s bytes rather than %(length)s bytes' ' at %(offset)s for "%(path)s"%(extra)s') internal_error = True def __init__(self, path, offset, length, actual, extra=None): PathError.__init__(self, path, extra=extra) self.offset = offset self.length = length self.actual = actual class PathNotChild(PathError): _fmt = 'Path "%(path)s" is not a child of path "%(base)s"%(extra)s' internal_error = False def __init__(self, path, base, extra=None): BzrError.__init__(self) self.path = path self.base = base if extra: self.extra = ': ' + str(extra) else: self.extra = '' class InvalidNormalization(PathError): _fmt = 'Path "%(path)s" is not unicode normalized' # TODO: This is given a URL; we try to unescape it but doing that from inside # the exception object is a bit undesirable. # TODO: Probably this behavior of should be a common superclass class NotBranchError(PathError): _fmt = 'Not a branch: "%(path)s"%(detail)s.' def __init__(self, path, detail=None, bzrdir=None): import bzrlib.urlutils as urlutils path = urlutils.unescape_for_display(path, 'ascii') if detail is not None: detail = ': ' + detail self.detail = detail self.bzrdir = bzrdir PathError.__init__(self, path=path) def __repr__(self): return '<%s %r>' % (self.__class__.__name__, self.__dict__) def _format(self): # XXX: Ideally self.detail would be a property, but Exceptions in # Python 2.4 have to be old-style classes so properties don't work. # Instead we override _format. if self.detail is None: if self.bzrdir is not None: try: self.bzrdir.open_repository() except NoRepositoryPresent: self.detail = '' except Exception: # Just ignore unexpected errors. Raising arbitrary errors # during str(err) can provoke strange bugs. Concretely # Launchpad's codehosting managed to raise NotBranchError # here, and then get stuck in an infinite loop/recursion # trying to str() that error. All this error really cares # about that there's no working repository there, and if # open_repository() fails, there probably isn't. self.detail = '' else: self.detail = ': location is a repository' else: self.detail = '' return PathError._format(self) class NoSubmitBranch(PathError): _fmt = 'No submit branch available for branch "%(path)s"' def __init__(self, branch): import bzrlib.urlutils as urlutils self.path = urlutils.unescape_for_display(branch.base, 'ascii') class AlreadyControlDirError(PathError): _fmt = 'A control directory already exists: "%(path)s".' class AlreadyBranchError(PathError): _fmt = 'Already a branch: "%(path)s".' class InvalidBranchName(PathError): _fmt = "Invalid branch name: %(name)s" def __init__(self, name): BzrError.__init__(self) self.name = name class ParentBranchExists(AlreadyBranchError): _fmt = 'Parent branch already exists: "%(path)s".' class BranchExistsWithoutWorkingTree(PathError): _fmt = 'Directory contains a branch, but no working tree \ (use bzr checkout if you wish to build a working tree): "%(path)s"' class AtomicFileAlreadyClosed(PathError): _fmt = ('"%(function)s" called on an AtomicFile after it was closed:' ' "%(path)s"') def __init__(self, path, function): PathError.__init__(self, path=path, extra=None) self.function = function class InaccessibleParent(PathError): _fmt = ('Parent not accessible given base "%(base)s" and' ' relative path "%(path)s"') def __init__(self, path, base): PathError.__init__(self, path) self.base = base class NoRepositoryPresent(BzrError): _fmt = 'No repository present: "%(path)s"' def __init__(self, bzrdir): BzrError.__init__(self) self.path = bzrdir.transport.clone('..').base class UnsupportedFormatError(BzrError): _fmt = "Unsupported branch format: %(format)s\nPlease run 'bzr upgrade'" class UnknownFormatError(BzrError): _fmt = "Unknown %(kind)s format: %(format)r" def __init__(self, format, kind='branch'): self.kind = kind self.format = format class IncompatibleFormat(BzrError): _fmt = "Format %(format)s is not compatible with .bzr version %(bzrdir)s." def __init__(self, format, bzrdir_format): BzrError.__init__(self) self.format = format self.bzrdir = bzrdir_format class ParseFormatError(BzrError): _fmt = "Parse error on line %(lineno)d of %(format)s format: %(line)s" def __init__(self, format, lineno, line, text): BzrError.__init__(self) self.format = format self.lineno = lineno self.line = line self.text = text class IncompatibleRepositories(BzrError): """Report an error that two repositories are not compatible. Note that the source and target repositories are permitted to be strings: this exception is thrown from the smart server and may refer to a repository the client hasn't opened. """ _fmt = "%(target)s\n" \ "is not compatible with\n" \ "%(source)s\n" \ "%(details)s" def __init__(self, source, target, details=None): if details is None: details = "(no details)" BzrError.__init__(self, target=target, source=source, details=details) class IncompatibleRevision(BzrError): _fmt = "Revision is not compatible with %(repo_format)s" def __init__(self, repo_format): BzrError.__init__(self) self.repo_format = repo_format class AlreadyVersionedError(BzrError): """Used when a path is expected not to be versioned, but it is.""" _fmt = "%(context_info)s%(path)s is already versioned." def __init__(self, path, context_info=None): """Construct a new AlreadyVersionedError. :param path: This is the path which is versioned, which should be in a user friendly form. :param context_info: If given, this is information about the context, which could explain why this is expected to not be versioned. """ BzrError.__init__(self) self.path = path if context_info is None: self.context_info = '' else: self.context_info = context_info + ". " class NotVersionedError(BzrError): """Used when a path is expected to be versioned, but it is not.""" _fmt = "%(context_info)s%(path)s is not versioned." def __init__(self, path, context_info=None): """Construct a new NotVersionedError. :param path: This is the path which is not versioned, which should be in a user friendly form. :param context_info: If given, this is information about the context, which could explain why this is expected to be versioned. """ BzrError.__init__(self) self.path = path if context_info is None: self.context_info = '' else: self.context_info = context_info + ". " class PathsNotVersionedError(BzrError): """Used when reporting several paths which are not versioned""" _fmt = "Path(s) are not versioned: %(paths_as_string)s" def __init__(self, paths): from bzrlib.osutils import quotefn BzrError.__init__(self) self.paths = paths self.paths_as_string = ' '.join([quotefn(p) for p in paths]) class PathsDoNotExist(BzrError): _fmt = "Path(s) do not exist: %(paths_as_string)s%(extra)s" # used when reporting that paths are neither versioned nor in the working # tree def __init__(self, paths, extra=None): # circular import from bzrlib.osutils import quotefn BzrError.__init__(self) self.paths = paths self.paths_as_string = ' '.join([quotefn(p) for p in paths]) if extra: self.extra = ': ' + str(extra) else: self.extra = '' class BadFileKindError(BzrError): _fmt = 'Cannot operate on "%(filename)s" of unsupported kind "%(kind)s"' def __init__(self, filename, kind): BzrError.__init__(self, filename=filename, kind=kind) class BadFilenameEncoding(BzrError): _fmt = ('Filename %(filename)r is not valid in your current filesystem' ' encoding %(fs_encoding)s') def __init__(self, filename, fs_encoding): BzrError.__init__(self) self.filename = filename self.fs_encoding = fs_encoding class ForbiddenControlFileError(BzrError): _fmt = 'Cannot operate on "%(filename)s" because it is a control file' class LockError(InternalBzrError): _fmt = "Lock error: %(msg)s" # All exceptions from the lock/unlock functions should be from # this exception class. They will be translated as necessary. The # original exception is available as e.original_error # # New code should prefer to raise specific subclasses def __init__(self, msg): self.msg = msg class LockActive(LockError): _fmt = "The lock for '%(lock_description)s' is in use and cannot be broken." internal_error = False def __init__(self, lock_description): self.lock_description = lock_description class CommitNotPossible(LockError): _fmt = "A commit was attempted but we do not have a write lock open." def __init__(self): pass class AlreadyCommitted(LockError): _fmt = "A rollback was requested, but is not able to be accomplished." def __init__(self): pass class ReadOnlyError(LockError): _fmt = "A write attempt was made in a read only transaction on %(obj)s" # TODO: There should also be an error indicating that you need a write # lock and don't have any lock at all... mbp 20070226 def __init__(self, obj): self.obj = obj class LockFailed(LockError): internal_error = False _fmt = "Cannot lock %(lock)s: %(why)s" def __init__(self, lock, why): LockError.__init__(self, '') self.lock = lock self.why = why class OutSideTransaction(BzrError): _fmt = ("A transaction related operation was attempted after" " the transaction finished.") class ObjectNotLocked(LockError): _fmt = "%(obj)r is not locked" # this can indicate that any particular object is not locked; see also # LockNotHeld which means that a particular *lock* object is not held by # the caller -- perhaps they should be unified. def __init__(self, obj): self.obj = obj class ReadOnlyObjectDirtiedError(ReadOnlyError): _fmt = "Cannot change object %(obj)r in read only transaction" def __init__(self, obj): self.obj = obj class UnlockableTransport(LockError): internal_error = False _fmt = "Cannot lock: transport is read only: %(transport)s" def __init__(self, transport): self.transport = transport class LockContention(LockError): _fmt = 'Could not acquire lock "%(lock)s": %(msg)s' internal_error = False def __init__(self, lock, msg=''): self.lock = lock self.msg = msg class LockBroken(LockError): _fmt = ("Lock was broken while still open: %(lock)s" " - check storage consistency!") internal_error = False def __init__(self, lock): self.lock = lock class LockBreakMismatch(LockError): _fmt = ("Lock was released and re-acquired before being broken:" " %(lock)s: held by %(holder)r, wanted to break %(target)r") internal_error = False def __init__(self, lock, holder, target): self.lock = lock self.holder = holder self.target = target class LockCorrupt(LockError): _fmt = ("Lock is apparently held, but corrupted: %(corruption_info)s\n" "Use 'bzr break-lock' to clear it") internal_error = False def __init__(self, corruption_info, file_data=None): self.corruption_info = corruption_info self.file_data = file_data class LockNotHeld(LockError): _fmt = "Lock not held: %(lock)s" internal_error = False def __init__(self, lock): self.lock = lock class TokenLockingNotSupported(LockError): _fmt = "The object %(obj)s does not support token specifying a token when locking." def __init__(self, obj): self.obj = obj class TokenMismatch(LockBroken): _fmt = "The lock token %(given_token)r does not match lock token %(lock_token)r." internal_error = True def __init__(self, given_token, lock_token): self.given_token = given_token self.lock_token = lock_token class PointlessCommit(BzrError): _fmt = "No changes to commit" class CannotCommitSelectedFileMerge(BzrError): _fmt = 'Selected-file commit of merges is not supported yet:'\ ' files %(files_str)s' def __init__(self, files): files_str = ', '.join(files) BzrError.__init__(self, files=files, files_str=files_str) class ExcludesUnsupported(BzrError): _fmt = ('Excluding paths during commit is not supported by ' 'repository at %(repository)r.') def __init__(self, repository): BzrError.__init__(self, repository=repository) class BadCommitMessageEncoding(BzrError): _fmt = 'The specified commit message contains characters unsupported by '\ 'the current encoding.' class UpgradeReadonly(BzrError): _fmt = "Upgrade URL cannot work with readonly URLs." class UpToDateFormat(BzrError): _fmt = "The branch format %(format)s is already at the most recent format." def __init__(self, format): BzrError.__init__(self) self.format = format class StrictCommitFailed(Exception): _fmt = "Commit refused because there are unknowns in the tree." class NoSuchRevision(InternalBzrError): _fmt = "%(branch)s has no revision %(revision)s" def __init__(self, branch, revision): # 'branch' may sometimes be an internal object like a KnitRevisionStore BzrError.__init__(self, branch=branch, revision=revision) class RangeInChangeOption(BzrError): _fmt = "Option --change does not accept revision ranges" class NoSuchRevisionSpec(BzrError): _fmt = "No namespace registered for string: %(spec)r" def __init__(self, spec): BzrError.__init__(self, spec=spec) class NoSuchRevisionInTree(NoSuchRevision): """When using Tree.revision_tree, and the revision is not accessible.""" _fmt = "The revision id {%(revision_id)s} is not present in the tree %(tree)s." def __init__(self, tree, revision_id): BzrError.__init__(self) self.tree = tree self.revision_id = revision_id class InvalidRevisionSpec(BzrError): _fmt = ("Requested revision: '%(spec)s' does not exist in branch:" " %(branch_url)s%(extra)s") def __init__(self, spec, branch, extra=None): BzrError.__init__(self, branch=branch, spec=spec) self.branch_url = getattr(branch, 'user_url', str(branch)) if extra: self.extra = '\n' + str(extra) else: self.extra = '' class AppendRevisionsOnlyViolation(BzrError): _fmt = ('Operation denied because it would change the main history,' ' which is not permitted by the append_revisions_only setting on' ' branch "%(location)s".') def __init__(self, location): import bzrlib.urlutils as urlutils location = urlutils.unescape_for_display(location, 'ascii') BzrError.__init__(self, location=location) class DivergedBranches(BzrError): _fmt = ("These branches have diverged." " Use the missing command to see how.\n" "Use the merge command to reconcile them.") def __init__(self, branch1, branch2): self.branch1 = branch1 self.branch2 = branch2 class NotLefthandHistory(InternalBzrError): _fmt = "Supplied history does not follow left-hand parents" def __init__(self, history): BzrError.__init__(self, history=history) class UnrelatedBranches(BzrError): _fmt = ("Branches have no common ancestor, and" " no merge base revision was specified.") class CannotReverseCherrypick(BzrError): _fmt = ('Selected merge cannot perform reverse cherrypicks. Try merge3' ' or diff3.') class NoCommonAncestor(BzrError): _fmt = "Revisions have no common ancestor: %(revision_a)s %(revision_b)s" def __init__(self, revision_a, revision_b): self.revision_a = revision_a self.revision_b = revision_b class NoCommonRoot(BzrError): _fmt = ("Revisions are not derived from the same root: " "%(revision_a)s %(revision_b)s.") def __init__(self, revision_a, revision_b): BzrError.__init__(self, revision_a=revision_a, revision_b=revision_b) class NotAncestor(BzrError): _fmt = "Revision %(rev_id)s is not an ancestor of %(not_ancestor_id)s" def __init__(self, rev_id, not_ancestor_id): BzrError.__init__(self, rev_id=rev_id, not_ancestor_id=not_ancestor_id) class NoCommits(BranchError): _fmt = "Branch %(branch)s has no commits." class UnlistableStore(BzrError): def __init__(self, store): BzrError.__init__(self, "Store %s is not listable" % store) class UnlistableBranch(BzrError): def __init__(self, br): BzrError.__init__(self, "Stores for branch %s are not listable" % br) class BoundBranchOutOfDate(BzrError): _fmt = ("Bound branch %(branch)s is out of date with master branch" " %(master)s.%(extra_help)s") def __init__(self, branch, master): BzrError.__init__(self) self.branch = branch self.master = master self.extra_help = '' class CommitToDoubleBoundBranch(BzrError): _fmt = ("Cannot commit to branch %(branch)s." " It is bound to %(master)s, which is bound to %(remote)s.") def __init__(self, branch, master, remote): BzrError.__init__(self) self.branch = branch self.master = master self.remote = remote class OverwriteBoundBranch(BzrError): _fmt = "Cannot pull --overwrite to a branch which is bound %(branch)s" def __init__(self, branch): BzrError.__init__(self) self.branch = branch class BoundBranchConnectionFailure(BzrError): _fmt = ("Unable to connect to target of bound branch %(branch)s" " => %(target)s: %(error)s") def __init__(self, branch, target, error): BzrError.__init__(self) self.branch = branch self.target = target self.error = error class WeaveError(BzrError): _fmt = "Error in processing weave: %(msg)s" def __init__(self, msg=None): BzrError.__init__(self) self.msg = msg class WeaveRevisionAlreadyPresent(WeaveError): _fmt = "Revision {%(revision_id)s} already present in %(weave)s" def __init__(self, revision_id, weave): WeaveError.__init__(self) self.revision_id = revision_id self.weave = weave class WeaveRevisionNotPresent(WeaveError): _fmt = "Revision {%(revision_id)s} not present in %(weave)s" def __init__(self, revision_id, weave): WeaveError.__init__(self) self.revision_id = revision_id self.weave = weave class WeaveFormatError(WeaveError): _fmt = "Weave invariant violated: %(what)s" def __init__(self, what): WeaveError.__init__(self) self.what = what class WeaveParentMismatch(WeaveError): _fmt = "Parents are mismatched between two revisions. %(msg)s" class WeaveInvalidChecksum(WeaveError): _fmt = "Text did not match its checksum: %(msg)s" class WeaveTextDiffers(WeaveError): _fmt = ("Weaves differ on text content. Revision:" " {%(revision_id)s}, %(weave_a)s, %(weave_b)s") def __init__(self, revision_id, weave_a, weave_b): WeaveError.__init__(self) self.revision_id = revision_id self.weave_a = weave_a self.weave_b = weave_b class WeaveTextDiffers(WeaveError): _fmt = ("Weaves differ on text content. Revision:" " {%(revision_id)s}, %(weave_a)s, %(weave_b)s") def __init__(self, revision_id, weave_a, weave_b): WeaveError.__init__(self) self.revision_id = revision_id self.weave_a = weave_a self.weave_b = weave_b class VersionedFileError(BzrError): _fmt = "Versioned file error" class RevisionNotPresent(VersionedFileError): _fmt = 'Revision {%(revision_id)s} not present in "%(file_id)s".' def __init__(self, revision_id, file_id): VersionedFileError.__init__(self) self.revision_id = revision_id self.file_id = file_id class RevisionAlreadyPresent(VersionedFileError): _fmt = 'Revision {%(revision_id)s} already present in "%(file_id)s".' def __init__(self, revision_id, file_id): VersionedFileError.__init__(self) self.revision_id = revision_id self.file_id = file_id class VersionedFileInvalidChecksum(VersionedFileError): _fmt = "Text did not match its checksum: %(msg)s" class KnitError(InternalBzrError): _fmt = "Knit error" class KnitCorrupt(KnitError): _fmt = "Knit %(filename)s corrupt: %(how)s" def __init__(self, filename, how): KnitError.__init__(self) self.filename = filename self.how = how class SHA1KnitCorrupt(KnitCorrupt): _fmt = ("Knit %(filename)s corrupt: sha-1 of reconstructed text does not " "match expected sha-1. key %(key)s expected sha %(expected)s actual " "sha %(actual)s") def __init__(self, filename, actual, expected, key, content): KnitError.__init__(self) self.filename = filename self.actual = actual self.expected = expected self.key = key self.content = content class KnitDataStreamIncompatible(KnitError): # Not raised anymore, as we can convert data streams. In future we may # need it again for more exotic cases, so we're keeping it around for now. _fmt = "Cannot insert knit data stream of format \"%(stream_format)s\" into knit of format \"%(target_format)s\"." def __init__(self, stream_format, target_format): self.stream_format = stream_format self.target_format = target_format class KnitDataStreamUnknown(KnitError): # Indicates a data stream we don't know how to handle. _fmt = "Cannot parse knit data stream of format \"%(stream_format)s\"." def __init__(self, stream_format): self.stream_format = stream_format class KnitHeaderError(KnitError): _fmt = 'Knit header error: %(badline)r unexpected for file "%(filename)s".' def __init__(self, badline, filename): KnitError.__init__(self) self.badline = badline self.filename = filename class KnitIndexUnknownMethod(KnitError): """Raised when we don't understand the storage method. Currently only 'fulltext' and 'line-delta' are supported. """ _fmt = ("Knit index %(filename)s does not have a known method" " in options: %(options)r") def __init__(self, filename, options): KnitError.__init__(self) self.filename = filename self.options = options class RetryWithNewPacks(BzrError): """Raised when we realize that the packs on disk have changed. This is meant as more of a signaling exception, to trap between where a local error occurred and the code that can actually handle the error and code that can retry appropriately. """ internal_error = True _fmt = ("Pack files have changed, reload and retry. context: %(context)s" " %(orig_error)s") def __init__(self, context, reload_occurred, exc_info): """create a new RetryWithNewPacks error. :param reload_occurred: Set to True if we know that the packs have already been reloaded, and we are failing because of an in-memory cache miss. If set to True then we will ignore if a reload says nothing has changed, because we assume it has already reloaded. If False, then a reload with nothing changed will force an error. :param exc_info: The original exception traceback, so if there is a problem we can raise the original error (value from sys.exc_info()) """ BzrError.__init__(self) self.context = context self.reload_occurred = reload_occurred self.exc_info = exc_info self.orig_error = exc_info[1] # TODO: The global error handler should probably treat this by # raising/printing the original exception with a bit about # RetryWithNewPacks also not being caught class RetryAutopack(RetryWithNewPacks): """Raised when we are autopacking and we find a missing file. Meant as a signaling exception, to tell the autopack code it should try again. """ internal_error = True _fmt = ("Pack files have changed, reload and try autopack again." " context: %(context)s %(orig_error)s") class NoSuchExportFormat(BzrError): _fmt = "Export format %(format)r not supported" def __init__(self, format): BzrError.__init__(self) self.format = format class TransportError(BzrError): _fmt = "Transport error: %(msg)s %(orig_error)s" def __init__(self, msg=None, orig_error=None): if msg is None and orig_error is not None: msg = str(orig_error) if orig_error is None: orig_error = '' if msg is None: msg = '' self.msg = msg self.orig_error = orig_error BzrError.__init__(self) class TooManyConcurrentRequests(InternalBzrError): _fmt = ("The medium '%(medium)s' has reached its concurrent request limit." " Be sure to finish_writing and finish_reading on the" " currently open request.") def __init__(self, medium): self.medium = medium class SmartProtocolError(TransportError): _fmt = "Generic bzr smart protocol error: %(details)s" def __init__(self, details): self.details = details class UnexpectedProtocolVersionMarker(TransportError): _fmt = "Received bad protocol version marker: %(marker)r" def __init__(self, marker): self.marker = marker class UnknownSmartMethod(InternalBzrError): _fmt = "The server does not recognise the '%(verb)s' request." def __init__(self, verb): self.verb = verb class SmartMessageHandlerError(InternalBzrError): _fmt = ("The message handler raised an exception:\n" "%(traceback_text)s") def __init__(self, exc_info): import traceback # GZ 2010-08-10: Cycle with exc_tb/exc_info affects at least one test self.exc_type, self.exc_value, self.exc_tb = exc_info self.exc_info = exc_info traceback_strings = traceback.format_exception( self.exc_type, self.exc_value, self.exc_tb) self.traceback_text = ''.join(traceback_strings) # A set of semi-meaningful errors which can be thrown class TransportNotPossible(TransportError): _fmt = "Transport operation not possible: %(msg)s %(orig_error)s" class ConnectionError(TransportError): _fmt = "Connection error: %(msg)s %(orig_error)s" class SocketConnectionError(ConnectionError): _fmt = "%(msg)s %(host)s%(port)s%(orig_error)s" def __init__(self, host, port=None, msg=None, orig_error=None): if msg is None: msg = 'Failed to connect to' if orig_error is None: orig_error = '' else: orig_error = '; ' + str(orig_error) ConnectionError.__init__(self, msg=msg, orig_error=orig_error) self.host = host if port is None: self.port = '' else: self.port = ':%s' % port # XXX: This is also used for unexpected end of file, which is different at the # TCP level from "connection reset". class ConnectionReset(TransportError): _fmt = "Connection closed: %(msg)s %(orig_error)s" class ConnectionTimeout(ConnectionError): _fmt = "Connection Timeout: %(msg)s%(orig_error)s" class InvalidRange(TransportError): _fmt = "Invalid range access in %(path)s at %(offset)s: %(msg)s" def __init__(self, path, offset, msg=None): TransportError.__init__(self, msg) self.path = path self.offset = offset class InvalidHttpResponse(TransportError): _fmt = "Invalid http response for %(path)s: %(msg)s%(orig_error)s" def __init__(self, path, msg, orig_error=None): self.path = path if orig_error is None: orig_error = '' else: # This is reached for obscure and unusual errors so we want to # preserve as much info as possible to ease debug. orig_error = ': %r' % (orig_error,) TransportError.__init__(self, msg, orig_error=orig_error) class InvalidHttpRange(InvalidHttpResponse): _fmt = "Invalid http range %(range)r for %(path)s: %(msg)s" def __init__(self, path, range, msg): self.range = range InvalidHttpResponse.__init__(self, path, msg) class HttpBoundaryMissing(InvalidHttpResponse): """A multipart response ends with no boundary marker. This is a special case caused by buggy proxies, described in . """ _fmt = "HTTP MIME Boundary missing for %(path)s: %(msg)s" def __init__(self, path, msg): InvalidHttpResponse.__init__(self, path, msg) class InvalidHttpContentType(InvalidHttpResponse): _fmt = 'Invalid http Content-type "%(ctype)s" for %(path)s: %(msg)s' def __init__(self, path, ctype, msg): self.ctype = ctype InvalidHttpResponse.__init__(self, path, msg) class RedirectRequested(TransportError): _fmt = '%(source)s is%(permanently)s redirected to %(target)s' def __init__(self, source, target, is_permanent=False): self.source = source self.target = target if is_permanent: self.permanently = ' permanently' else: self.permanently = '' TransportError.__init__(self) class TooManyRedirections(TransportError): _fmt = "Too many redirections" class ConflictsInTree(BzrError): _fmt = "Working tree has conflicts." class ConfigContentError(BzrError): _fmt = "Config file %(filename)s is not UTF-8 encoded\n" def __init__(self, filename): BzrError.__init__(self) self.filename = filename class ParseConfigError(BzrError): _fmt = "Error(s) parsing config file %(filename)s:\n%(errors)s" def __init__(self, errors, filename): BzrError.__init__(self) self.filename = filename self.errors = '\n'.join(e.msg for e in errors) class ConfigOptionValueError(BzrError): _fmt = ('Bad value "%(value)s" for option "%(name)s".\n' 'See ``bzr help %(name)s``') def __init__(self, name, value): BzrError.__init__(self, name=name, value=value) class NoEmailInUsername(BzrError): _fmt = "%(username)r does not seem to contain a reasonable email address" def __init__(self, username): BzrError.__init__(self) self.username = username class SigningFailed(BzrError): _fmt = 'Failed to GPG sign data with command "%(command_line)s"' def __init__(self, command_line): BzrError.__init__(self, command_line=command_line) class SignatureVerificationFailed(BzrError): _fmt = 'Failed to verify GPG signature data with error "%(error)s"' def __init__(self, error): BzrError.__init__(self, error=error) class DependencyNotPresent(BzrError): _fmt = 'Unable to import library "%(library)s": %(error)s' def __init__(self, library, error): BzrError.__init__(self, library=library, error=error) class GpgmeNotInstalled(DependencyNotPresent): _fmt = 'python-gpgme is not installed, it is needed to verify signatures' def __init__(self, error): DependencyNotPresent.__init__(self, 'gpgme', error) class WorkingTreeNotRevision(BzrError): _fmt = ("The working tree for %(basedir)s has changed since" " the last commit, but weave merge requires that it be" " unchanged") def __init__(self, tree): BzrError.__init__(self, basedir=tree.basedir) class CantReprocessAndShowBase(BzrError): _fmt = ("Can't reprocess and show base, because reprocessing obscures " "the relationship of conflicting lines to the base") class GraphCycleError(BzrError): _fmt = "Cycle in graph %(graph)r" def __init__(self, graph): BzrError.__init__(self) self.graph = graph class WritingCompleted(InternalBzrError): _fmt = ("The MediumRequest '%(request)s' has already had finish_writing " "called upon it - accept bytes may not be called anymore.") def __init__(self, request): self.request = request class WritingNotComplete(InternalBzrError): _fmt = ("The MediumRequest '%(request)s' has not has finish_writing " "called upon it - until the write phase is complete no " "data may be read.") def __init__(self, request): self.request = request class NotConflicted(BzrError): _fmt = "File %(filename)s is not conflicted." def __init__(self, filename): BzrError.__init__(self) self.filename = filename class MediumNotConnected(InternalBzrError): _fmt = """The medium '%(medium)s' is not connected.""" def __init__(self, medium): self.medium = medium class MustUseDecorated(Exception): _fmt = "A decorating function has requested its original command be used." class NoBundleFound(BzrError): _fmt = 'No bundle was found in "%(filename)s".' def __init__(self, filename): BzrError.__init__(self) self.filename = filename class BundleNotSupported(BzrError): _fmt = "Unable to handle bundle version %(version)s: %(msg)s" def __init__(self, version, msg): BzrError.__init__(self) self.version = version self.msg = msg class MissingText(BzrError): _fmt = ("Branch %(base)s is missing revision" " %(text_revision)s of %(file_id)s") def __init__(self, branch, text_revision, file_id): BzrError.__init__(self) self.branch = branch self.base = branch.base self.text_revision = text_revision self.file_id = file_id class DuplicateFileId(BzrError): _fmt = "File id {%(file_id)s} already exists in inventory as %(entry)s" def __init__(self, file_id, entry): BzrError.__init__(self) self.file_id = file_id self.entry = entry class DuplicateKey(BzrError): _fmt = "Key %(key)s is already present in map" class DuplicateHelpPrefix(BzrError): _fmt = "The prefix %(prefix)s is in the help search path twice." def __init__(self, prefix): self.prefix = prefix class MalformedTransform(InternalBzrError): _fmt = "Tree transform is malformed %(conflicts)r" class NoFinalPath(BzrError): _fmt = ("No final name for trans_id %(trans_id)r\n" "file-id: %(file_id)r\n" "root trans-id: %(root_trans_id)r\n") def __init__(self, trans_id, transform): self.trans_id = trans_id self.file_id = transform.final_file_id(trans_id) self.root_trans_id = transform.root class BzrBadParameter(InternalBzrError): _fmt = "Bad parameter: %(param)r" # This exception should never be thrown, but it is a base class for all # parameter-to-function errors. def __init__(self, param): BzrError.__init__(self) self.param = param class BzrBadParameterNotUnicode(BzrBadParameter): _fmt = "Parameter %(param)s is neither unicode nor utf8." class ReusingTransform(BzrError): _fmt = "Attempt to reuse a transform that has already been applied." class CantMoveRoot(BzrError): _fmt = "Moving the root directory is not supported at this time" class TransformRenameFailed(BzrError): _fmt = "Failed to rename %(from_path)s to %(to_path)s: %(why)s" def __init__(self, from_path, to_path, why, errno): self.from_path = from_path self.to_path = to_path self.why = why self.errno = errno class BzrMoveFailedError(BzrError): _fmt = ("Could not move %(from_path)s%(operator)s %(to_path)s" "%(_has_extra)s%(extra)s") def __init__(self, from_path='', to_path='', extra=None): from bzrlib.osutils import splitpath BzrError.__init__(self) if extra: self.extra, self._has_extra = extra, ': ' else: self.extra = self._has_extra = '' has_from = len(from_path) > 0 has_to = len(to_path) > 0 if has_from: self.from_path = splitpath(from_path)[-1] else: self.from_path = '' if has_to: self.to_path = splitpath(to_path)[-1] else: self.to_path = '' self.operator = "" if has_from and has_to: self.operator = " =>" elif has_from: self.from_path = "from " + from_path elif has_to: self.operator = "to" else: self.operator = "file" class BzrRenameFailedError(BzrMoveFailedError): _fmt = ("Could not rename %(from_path)s%(operator)s %(to_path)s" "%(_has_extra)s%(extra)s") def __init__(self, from_path, to_path, extra=None): BzrMoveFailedError.__init__(self, from_path, to_path, extra) class BzrBadParameterNotString(BzrBadParameter): _fmt = "Parameter %(param)s is not a string or unicode string." class BzrBadParameterMissing(BzrBadParameter): _fmt = "Parameter %(param)s is required but not present." class BzrBadParameterUnicode(BzrBadParameter): _fmt = ("Parameter %(param)s is unicode but" " only byte-strings are permitted.") class BzrBadParameterContainsNewline(BzrBadParameter): _fmt = "Parameter %(param)s contains a newline." class ParamikoNotPresent(DependencyNotPresent): _fmt = "Unable to import paramiko (required for sftp support): %(error)s" def __init__(self, error): DependencyNotPresent.__init__(self, 'paramiko', error) class PointlessMerge(BzrError): _fmt = "Nothing to merge." class UninitializableFormat(BzrError): _fmt = "Format %(format)s cannot be initialised by this version of bzr." def __init__(self, format): BzrError.__init__(self) self.format = format class BadConversionTarget(BzrError): _fmt = "Cannot convert from format %(from_format)s to format %(format)s." \ " %(problem)s" def __init__(self, problem, format, from_format=None): BzrError.__init__(self) self.problem = problem self.format = format self.from_format = from_format or '(unspecified)' class NoDiffFound(BzrError): _fmt = 'Could not find an appropriate Differ for file "%(path)s"' def __init__(self, path): BzrError.__init__(self, path) class ExecutableMissing(BzrError): _fmt = "%(exe_name)s could not be found on this machine" def __init__(self, exe_name): BzrError.__init__(self, exe_name=exe_name) class NoDiff(BzrError): _fmt = "Diff is not installed on this machine: %(msg)s" def __init__(self, msg): BzrError.__init__(self, msg=msg) class NoDiff3(BzrError): _fmt = "Diff3 is not installed on this machine." class ExistingContent(BzrError): # Added in bzrlib 0.92, used by VersionedFile.add_lines. _fmt = "The content being inserted is already present." class ExistingLimbo(BzrError): _fmt = """This tree contains left-over files from a failed operation. Please examine %(limbo_dir)s to see if it contains any files you wish to keep, and delete it when you are done.""" def __init__(self, limbo_dir): BzrError.__init__(self) self.limbo_dir = limbo_dir class ExistingPendingDeletion(BzrError): _fmt = """This tree contains left-over files from a failed operation. Please examine %(pending_deletion)s to see if it contains any files you wish to keep, and delete it when you are done.""" def __init__(self, pending_deletion): BzrError.__init__(self, pending_deletion=pending_deletion) class ImmortalLimbo(BzrError): _fmt = """Unable to delete transform temporary directory %(limbo_dir)s. Please examine %(limbo_dir)s to see if it contains any files you wish to keep, and delete it when you are done.""" def __init__(self, limbo_dir): BzrError.__init__(self) self.limbo_dir = limbo_dir class ImmortalPendingDeletion(BzrError): _fmt = ("Unable to delete transform temporary directory " "%(pending_deletion)s. Please examine %(pending_deletion)s to see if it " "contains any files you wish to keep, and delete it when you are done.") def __init__(self, pending_deletion): BzrError.__init__(self, pending_deletion=pending_deletion) class OutOfDateTree(BzrError): _fmt = "Working tree is out of date, please run 'bzr update'.%(more)s" def __init__(self, tree, more=None): if more is None: more = '' else: more = ' ' + more BzrError.__init__(self) self.tree = tree self.more = more class PublicBranchOutOfDate(BzrError): _fmt = 'Public branch "%(public_location)s" lacks revision '\ '"%(revstring)s".' def __init__(self, public_location, revstring): import bzrlib.urlutils as urlutils public_location = urlutils.unescape_for_display(public_location, 'ascii') BzrError.__init__(self, public_location=public_location, revstring=revstring) class MergeModifiedFormatError(BzrError): _fmt = "Error in merge modified format" class ConflictFormatError(BzrError): _fmt = "Format error in conflict listings" class CorruptDirstate(BzrError): _fmt = ("Inconsistency in dirstate file %(dirstate_path)s.\n" "Error: %(description)s") def __init__(self, dirstate_path, description): BzrError.__init__(self) self.dirstate_path = dirstate_path self.description = description class CorruptRepository(BzrError): _fmt = ("An error has been detected in the repository %(repo_path)s.\n" "Please run bzr reconcile on this repository.") def __init__(self, repo): BzrError.__init__(self) self.repo_path = repo.user_url class InconsistentDelta(BzrError): """Used when we get a delta that is not valid.""" _fmt = ("An inconsistent delta was supplied involving %(path)r," " %(file_id)r\nreason: %(reason)s") def __init__(self, path, file_id, reason): BzrError.__init__(self) self.path = path self.file_id = file_id self.reason = reason class InconsistentDeltaDelta(InconsistentDelta): """Used when we get a delta that is not valid.""" _fmt = ("An inconsistent delta was supplied: %(delta)r" "\nreason: %(reason)s") def __init__(self, delta, reason): BzrError.__init__(self) self.delta = delta self.reason = reason class UpgradeRequired(BzrError): _fmt = "To use this feature you must upgrade your branch at %(path)s." def __init__(self, path): BzrError.__init__(self) self.path = path class RepositoryUpgradeRequired(UpgradeRequired): _fmt = "To use this feature you must upgrade your repository at %(path)s." class RichRootUpgradeRequired(UpgradeRequired): _fmt = ("To use this feature you must upgrade your branch at %(path)s to" " a format which supports rich roots.") class LocalRequiresBoundBranch(BzrError): _fmt = "Cannot perform local-only commits on unbound branches." class UnsupportedOperation(BzrError): _fmt = ("The method %(mname)s is not supported on" " objects of type %(tname)s.") def __init__(self, method, method_self): self.method = method self.mname = method.__name__ self.tname = type(method_self).__name__ class CannotSetRevisionId(UnsupportedOperation): """Raised when a commit is attempting to set a revision id but cant.""" class NonAsciiRevisionId(UnsupportedOperation): """Raised when a commit is attempting to set a non-ascii revision id but cant. """ class GhostTagsNotSupported(BzrError): _fmt = "Ghost tags not supported by format %(format)r." def __init__(self, format): self.format = format class BinaryFile(BzrError): _fmt = "File is binary but should be text." class IllegalPath(BzrError): _fmt = "The path %(path)s is not permitted on this platform" def __init__(self, path): BzrError.__init__(self) self.path = path class TestamentMismatch(BzrError): _fmt = """Testament did not match expected value. For revision_id {%(revision_id)s}, expected {%(expected)s}, measured {%(measured)s}""" def __init__(self, revision_id, expected, measured): self.revision_id = revision_id self.expected = expected self.measured = measured class NotABundle(BzrError): _fmt = "Not a bzr revision-bundle: %(text)r" def __init__(self, text): BzrError.__init__(self) self.text = text class BadBundle(BzrError): _fmt = "Bad bzr revision-bundle: %(text)r" def __init__(self, text): BzrError.__init__(self) self.text = text class MalformedHeader(BadBundle): _fmt = "Malformed bzr revision-bundle header: %(text)r" class MalformedPatches(BadBundle): _fmt = "Malformed patches in bzr revision-bundle: %(text)r" class MalformedFooter(BadBundle): _fmt = "Malformed footer in bzr revision-bundle: %(text)r" class UnsupportedEOLMarker(BadBundle): _fmt = "End of line marker was not \\n in bzr revision-bundle" def __init__(self): # XXX: BadBundle's constructor assumes there's explanatory text, # but for this there is not BzrError.__init__(self) class IncompatibleBundleFormat(BzrError): _fmt = "Bundle format %(bundle_format)s is incompatible with %(other)s" def __init__(self, bundle_format, other): BzrError.__init__(self) self.bundle_format = bundle_format self.other = other class BadInventoryFormat(BzrError): _fmt = "Root class for inventory serialization errors" class UnexpectedInventoryFormat(BadInventoryFormat): _fmt = "The inventory was not in the expected format:\n %(msg)s" def __init__(self, msg): BadInventoryFormat.__init__(self, msg=msg) class RootNotRich(BzrError): _fmt = """This operation requires rich root data storage""" class NoSmartMedium(InternalBzrError): _fmt = "The transport '%(transport)s' cannot tunnel the smart protocol." def __init__(self, transport): self.transport = transport class UnknownSSH(BzrError): _fmt = "Unrecognised value for BZR_SSH environment variable: %(vendor)s" def __init__(self, vendor): BzrError.__init__(self) self.vendor = vendor class SSHVendorNotFound(BzrError): _fmt = ("Don't know how to handle SSH connections." " Please set BZR_SSH environment variable.") class GhostRevisionsHaveNoRevno(BzrError): """When searching for revnos, if we encounter a ghost, we are stuck""" _fmt = ("Could not determine revno for {%(revision_id)s} because" " its ancestry shows a ghost at {%(ghost_revision_id)s}") def __init__(self, revision_id, ghost_revision_id): self.revision_id = revision_id self.ghost_revision_id = ghost_revision_id class GhostRevisionUnusableHere(BzrError): _fmt = "Ghost revision {%(revision_id)s} cannot be used here." def __init__(self, revision_id): BzrError.__init__(self) self.revision_id = revision_id class IllegalUseOfScopeReplacer(InternalBzrError): _fmt = ("ScopeReplacer object %(name)r was used incorrectly:" " %(msg)s%(extra)s") def __init__(self, name, msg, extra=None): BzrError.__init__(self) self.name = name self.msg = msg if extra: self.extra = ': ' + str(extra) else: self.extra = '' class InvalidImportLine(InternalBzrError): _fmt = "Not a valid import statement: %(msg)\n%(text)s" def __init__(self, text, msg): BzrError.__init__(self) self.text = text self.msg = msg class ImportNameCollision(InternalBzrError): _fmt = ("Tried to import an object to the same name as" " an existing object. %(name)s") def __init__(self, name): BzrError.__init__(self) self.name = name class NotAMergeDirective(BzrError): """File starting with %(firstline)r is not a merge directive""" def __init__(self, firstline): BzrError.__init__(self, firstline=firstline) class NoMergeSource(BzrError): """Raise if no merge source was specified for a merge directive""" _fmt = "A merge directive must provide either a bundle or a public"\ " branch location." class IllegalMergeDirectivePayload(BzrError): """A merge directive contained something other than a patch or bundle""" _fmt = "Bad merge directive payload %(start)r" def __init__(self, start): BzrError(self) self.start = start class PatchVerificationFailed(BzrError): """A patch from a merge directive could not be verified""" _fmt = "Preview patch does not match requested changes." class PatchMissing(BzrError): """Raise a patch type was specified but no patch supplied""" _fmt = "Patch_type was %(patch_type)s, but no patch was supplied." def __init__(self, patch_type): BzrError.__init__(self) self.patch_type = patch_type class TargetNotBranch(BzrError): """A merge directive's target branch is required, but isn't a branch""" _fmt = ("Your branch does not have all of the revisions required in " "order to merge this merge directive and the target " "location specified in the merge directive is not a branch: " "%(location)s.") def __init__(self, location): BzrError.__init__(self) self.location = location class UnsupportedInventoryKind(BzrError): _fmt = """Unsupported entry kind %(kind)s""" def __init__(self, kind): self.kind = kind class BadSubsumeSource(BzrError): _fmt = "Can't subsume %(other_tree)s into %(tree)s. %(reason)s" def __init__(self, tree, other_tree, reason): self.tree = tree self.other_tree = other_tree self.reason = reason class SubsumeTargetNeedsUpgrade(BzrError): _fmt = """Subsume target %(other_tree)s needs to be upgraded.""" def __init__(self, other_tree): self.other_tree = other_tree class BadReferenceTarget(InternalBzrError): _fmt = "Can't add reference to %(other_tree)s into %(tree)s." \ "%(reason)s" def __init__(self, tree, other_tree, reason): self.tree = tree self.other_tree = other_tree self.reason = reason class NoSuchTag(BzrError): _fmt = "No such tag: %(tag_name)s" def __init__(self, tag_name): self.tag_name = tag_name class TagsNotSupported(BzrError): _fmt = ("Tags not supported by %(branch)s;" " you may be able to use bzr upgrade.") def __init__(self, branch): self.branch = branch class TagAlreadyExists(BzrError): _fmt = "Tag %(tag_name)s already exists." def __init__(self, tag_name): self.tag_name = tag_name class MalformedBugIdentifier(BzrError): _fmt = ('Did not understand bug identifier %(bug_id)s: %(reason)s. ' 'See "bzr help bugs" for more information on this feature.') def __init__(self, bug_id, reason): self.bug_id = bug_id self.reason = reason class InvalidBugTrackerURL(BzrError): _fmt = ("The URL for bug tracker \"%(abbreviation)s\" doesn't " "contain {id}: %(url)s") def __init__(self, abbreviation, url): self.abbreviation = abbreviation self.url = url class UnknownBugTrackerAbbreviation(BzrError): _fmt = ("Cannot find registered bug tracker called %(abbreviation)s " "on %(branch)s") def __init__(self, abbreviation, branch): self.abbreviation = abbreviation self.branch = branch class InvalidLineInBugsProperty(BzrError): _fmt = ("Invalid line in bugs property: '%(line)s'") def __init__(self, line): self.line = line class InvalidBugStatus(BzrError): _fmt = ("Invalid bug status: '%(status)s'") def __init__(self, status): self.status = status class UnexpectedSmartServerResponse(BzrError): _fmt = "Could not understand response from smart server: %(response_tuple)r" def __init__(self, response_tuple): self.response_tuple = response_tuple class ErrorFromSmartServer(BzrError): """An error was received from a smart server. :seealso: UnknownErrorFromSmartServer """ _fmt = "Error received from smart server: %(error_tuple)r" internal_error = True def __init__(self, error_tuple): self.error_tuple = error_tuple try: self.error_verb = error_tuple[0] except IndexError: self.error_verb = None self.error_args = error_tuple[1:] class UnknownErrorFromSmartServer(BzrError): """An ErrorFromSmartServer could not be translated into a typical bzrlib error. This is distinct from ErrorFromSmartServer so that it is possible to distinguish between the following two cases: - ErrorFromSmartServer was uncaught. This is logic error in the client and so should provoke a traceback to the user. - ErrorFromSmartServer was caught but its error_tuple could not be translated. This is probably because the server sent us garbage, and should not provoke a traceback. """ _fmt = "Server sent an unexpected error: %(error_tuple)r" internal_error = False def __init__(self, error_from_smart_server): """Constructor. :param error_from_smart_server: An ErrorFromSmartServer instance. """ self.error_from_smart_server = error_from_smart_server self.error_tuple = error_from_smart_server.error_tuple class ContainerError(BzrError): """Base class of container errors.""" class UnknownContainerFormatError(ContainerError): _fmt = "Unrecognised container format: %(container_format)r" def __init__(self, container_format): self.container_format = container_format class UnexpectedEndOfContainerError(ContainerError): _fmt = "Unexpected end of container stream" class UnknownRecordTypeError(ContainerError): _fmt = "Unknown record type: %(record_type)r" def __init__(self, record_type): self.record_type = record_type class InvalidRecordError(ContainerError): _fmt = "Invalid record: %(reason)s" def __init__(self, reason): self.reason = reason class ContainerHasExcessDataError(ContainerError): _fmt = "Container has data after end marker: %(excess)r" def __init__(self, excess): self.excess = excess class DuplicateRecordNameError(ContainerError): _fmt = "Container has multiple records with the same name: %(name)s" def __init__(self, name): self.name = name.decode("utf-8") class NoDestinationAddress(InternalBzrError): _fmt = "Message does not have a destination address." class RepositoryDataStreamError(BzrError): _fmt = "Corrupt or incompatible data stream: %(reason)s" def __init__(self, reason): self.reason = reason class SMTPError(BzrError): _fmt = "SMTP error: %(error)s" def __init__(self, error): self.error = error class NoMessageSupplied(BzrError): _fmt = "No message supplied." class NoMailAddressSpecified(BzrError): _fmt = "No mail-to address (--mail-to) or output (-o) specified." class MailClientNotFound(BzrError): _fmt = "Unable to find mail client with the following names:"\ " %(mail_command_list_string)s" def __init__(self, mail_command_list): mail_command_list_string = ', '.join(mail_command_list) BzrError.__init__(self, mail_command_list=mail_command_list, mail_command_list_string=mail_command_list_string) class SMTPConnectionRefused(SMTPError): _fmt = "SMTP connection to %(host)s refused" def __init__(self, error, host): self.error = error self.host = host class DefaultSMTPConnectionRefused(SMTPConnectionRefused): _fmt = "Please specify smtp_server. No server at default %(host)s." class BzrDirError(BzrError): def __init__(self, bzrdir): import bzrlib.urlutils as urlutils display_url = urlutils.unescape_for_display(bzrdir.user_url, 'ascii') BzrError.__init__(self, bzrdir=bzrdir, display_url=display_url) class UnsyncedBranches(BzrDirError): _fmt = ("'%(display_url)s' is not in sync with %(target_url)s. See" " bzr help sync-for-reconfigure.") def __init__(self, bzrdir, target_branch): BzrDirError.__init__(self, bzrdir) import bzrlib.urlutils as urlutils self.target_url = urlutils.unescape_for_display(target_branch.base, 'ascii') class AlreadyBranch(BzrDirError): _fmt = "'%(display_url)s' is already a branch." class AlreadyTree(BzrDirError): _fmt = "'%(display_url)s' is already a tree." class AlreadyCheckout(BzrDirError): _fmt = "'%(display_url)s' is already a checkout." class AlreadyLightweightCheckout(BzrDirError): _fmt = "'%(display_url)s' is already a lightweight checkout." class AlreadyUsingShared(BzrDirError): _fmt = "'%(display_url)s' is already using a shared repository." class AlreadyStandalone(BzrDirError): _fmt = "'%(display_url)s' is already standalone." class AlreadyWithTrees(BzrDirError): _fmt = ("Shared repository '%(display_url)s' already creates " "working trees.") class AlreadyWithNoTrees(BzrDirError): _fmt = ("Shared repository '%(display_url)s' already doesn't create " "working trees.") class ReconfigurationNotSupported(BzrDirError): _fmt = "Requested reconfiguration of '%(display_url)s' is not supported." class NoBindLocation(BzrDirError): _fmt = "No location could be found to bind to at %(display_url)s." class UncommittedChanges(BzrError): _fmt = ('Working tree "%(display_url)s" has uncommitted changes' ' (See bzr status).%(more)s') def __init__(self, tree, more=None): if more is None: more = '' else: more = ' ' + more import bzrlib.urlutils as urlutils user_url = getattr(tree, "user_url", None) if user_url is None: display_url = str(tree) else: display_url = urlutils.unescape_for_display(user_url, 'ascii') BzrError.__init__(self, tree=tree, display_url=display_url, more=more) class StoringUncommittedNotSupported(BzrError): _fmt = ('Branch "%(display_url)s" does not support storing uncommitted' ' changes.') def __init__(self, branch): import bzrlib.urlutils as urlutils user_url = getattr(branch, "user_url", None) if user_url is None: display_url = str(branch) else: display_url = urlutils.unescape_for_display(user_url, 'ascii') BzrError.__init__(self, branch=branch, display_url=display_url) class ShelvedChanges(UncommittedChanges): _fmt = ('Working tree "%(display_url)s" has shelved changes' ' (See bzr shelve --list).%(more)s') class MissingTemplateVariable(BzrError): _fmt = 'Variable {%(name)s} is not available.' def __init__(self, name): self.name = name class NoTemplate(BzrError): _fmt = 'No template specified.' class UnableCreateSymlink(BzrError): _fmt = 'Unable to create symlink %(path_str)son this platform' def __init__(self, path=None): path_str = '' if path: try: path_str = repr(str(path)) except UnicodeEncodeError: path_str = repr(path) path_str += ' ' self.path_str = path_str class UnsupportedTimezoneFormat(BzrError): _fmt = ('Unsupported timezone format "%(timezone)s", ' 'options are "utc", "original", "local".') def __init__(self, timezone): self.timezone = timezone class CommandAvailableInPlugin(StandardError): internal_error = False def __init__(self, cmd_name, plugin_metadata, provider): self.plugin_metadata = plugin_metadata self.cmd_name = cmd_name self.provider = provider def __str__(self): _fmt = ('"%s" is not a standard bzr command. \n' 'However, the following official plugin provides this command: %s\n' 'You can install it by going to: %s' % (self.cmd_name, self.plugin_metadata['name'], self.plugin_metadata['url'])) return _fmt class NoPluginAvailable(BzrError): pass class UnableEncodePath(BzrError): _fmt = ('Unable to encode %(kind)s path %(path)r in ' 'user encoding %(user_encoding)s') def __init__(self, path, kind): from bzrlib.osutils import get_user_encoding self.path = path self.kind = kind self.user_encoding = get_user_encoding() class NoSuchConfig(BzrError): _fmt = ('The "%(config_id)s" configuration does not exist.') def __init__(self, config_id): BzrError.__init__(self, config_id=config_id) class NoSuchConfigOption(BzrError): _fmt = ('The "%(option_name)s" configuration option does not exist.') def __init__(self, option_name): BzrError.__init__(self, option_name=option_name) class NoSuchAlias(BzrError): _fmt = ('The alias "%(alias_name)s" does not exist.') def __init__(self, alias_name): BzrError.__init__(self, alias_name=alias_name) class DirectoryLookupFailure(BzrError): """Base type for lookup errors.""" pass class InvalidLocationAlias(DirectoryLookupFailure): _fmt = '"%(alias_name)s" is not a valid location alias.' def __init__(self, alias_name): DirectoryLookupFailure.__init__(self, alias_name=alias_name) class UnsetLocationAlias(DirectoryLookupFailure): _fmt = 'No %(alias_name)s location assigned.' def __init__(self, alias_name): DirectoryLookupFailure.__init__(self, alias_name=alias_name[1:]) class CannotBindAddress(BzrError): _fmt = 'Cannot bind address "%(host)s:%(port)i": %(orig_error)s.' def __init__(self, host, port, orig_error): # nb: in python2.4 socket.error doesn't have a useful repr BzrError.__init__(self, host=host, port=port, orig_error=repr(orig_error.args)) class UnknownRules(BzrError): _fmt = ('Unknown rules detected: %(unknowns_str)s.') def __init__(self, unknowns): BzrError.__init__(self, unknowns_str=", ".join(unknowns)) class TipChangeRejected(BzrError): """A pre_change_branch_tip hook function may raise this to cleanly and explicitly abort a change to a branch tip. """ _fmt = u"Tip change rejected: %(msg)s" def __init__(self, msg): self.msg = msg class ShelfCorrupt(BzrError): _fmt = "Shelf corrupt." class DecompressCorruption(BzrError): _fmt = "Corruption while decompressing repository file%(orig_error)s" def __init__(self, orig_error=None): if orig_error is not None: self.orig_error = ", %s" % (orig_error,) else: self.orig_error = "" BzrError.__init__(self) class NoSuchShelfId(BzrError): _fmt = 'No changes are shelved with id "%(shelf_id)d".' def __init__(self, shelf_id): BzrError.__init__(self, shelf_id=shelf_id) class InvalidShelfId(BzrError): _fmt = '"%(invalid_id)s" is not a valid shelf id, try a number instead.' def __init__(self, invalid_id): BzrError.__init__(self, invalid_id=invalid_id) class JailBreak(BzrError): _fmt = "An attempt to access a url outside the server jail was made: '%(url)s'." def __init__(self, url): BzrError.__init__(self, url=url) class UserAbort(BzrError): _fmt = 'The user aborted the operation.' class MustHaveWorkingTree(BzrError): _fmt = ("Branching '%(url)s'(%(format)s) must create a working tree.") def __init__(self, format, url): BzrError.__init__(self, format=format, url=url) class NoSuchView(BzrError): """A view does not exist. """ _fmt = u"No such view: %(view_name)s." def __init__(self, view_name): self.view_name = view_name class ViewsNotSupported(BzrError): """Views are not supported by a tree format. """ _fmt = ("Views are not supported by %(tree)s;" " use 'bzr upgrade' to change your tree to a later format.") def __init__(self, tree): self.tree = tree class FileOutsideView(BzrError): _fmt = ('Specified file "%(file_name)s" is outside the current view: ' '%(view_str)s') def __init__(self, file_name, view_files): self.file_name = file_name self.view_str = ", ".join(view_files) class UnresumableWriteGroup(BzrError): _fmt = ("Repository %(repository)s cannot resume write group " "%(write_groups)r: %(reason)s") internal_error = True def __init__(self, repository, write_groups, reason): self.repository = repository self.write_groups = write_groups self.reason = reason class UnsuspendableWriteGroup(BzrError): _fmt = ("Repository %(repository)s cannot suspend a write group.") internal_error = True def __init__(self, repository): self.repository = repository class LossyPushToSameVCS(BzrError): _fmt = ("Lossy push not possible between %(source_branch)r and " "%(target_branch)r that are in the same VCS.") internal_error = True def __init__(self, source_branch, target_branch): self.source_branch = source_branch self.target_branch = target_branch class NoRoundtrippingSupport(BzrError): _fmt = ("Roundtripping is not supported between %(source_branch)r and " "%(target_branch)r.") internal_error = True def __init__(self, source_branch, target_branch): self.source_branch = source_branch self.target_branch = target_branch class FileTimestampUnavailable(BzrError): _fmt = "The filestamp for %(path)s is not available." internal_error = True def __init__(self, path): self.path = path class NoColocatedBranchSupport(BzrError): _fmt = ("%(bzrdir)r does not support co-located branches.") def __init__(self, bzrdir): self.bzrdir = bzrdir class NoWhoami(BzrError): _fmt = ('Unable to determine your name.\n' "Please, set your name with the 'whoami' command.\n" 'E.g. bzr whoami "Your Name "') class InvalidPattern(BzrError): _fmt = ('Invalid pattern(s) found. %(msg)s') def __init__(self, msg): self.msg = msg class RecursiveBind(BzrError): _fmt = ('Branch "%(branch_url)s" appears to be bound to itself. ' 'Please use `bzr unbind` to fix.') def __init__(self, branch_url): self.branch_url = branch_url # FIXME: I would prefer to define the config related exception classes in # config.py but the lazy import mechanism proscribes this -- vila 20101222 class OptionExpansionLoop(BzrError): _fmt = 'Loop involving %(refs)r while expanding "%(string)s".' def __init__(self, string, refs): self.string = string self.refs = '->'.join(refs) class ExpandingUnknownOption(BzrError): _fmt = 'Option "%(name)s" is not defined while expanding "%(string)s".' def __init__(self, name, string): self.name = name self.string = string class IllegalOptionName(BzrError): _fmt = 'Option "%(name)s" is not allowed.' def __init__(self, name): self.name = name class NoCompatibleInter(BzrError): _fmt = ('No compatible object available for operations from %(source)r ' 'to %(target)r.') def __init__(self, source, target): self.source = source self.target = target class HpssVfsRequestNotAllowed(BzrError): _fmt = ("VFS requests over the smart server are not allowed. Encountered: " "%(method)s, %(arguments)s.") def __init__(self, method, arguments): self.method = method self.arguments = arguments class UnsupportedKindChange(BzrError): _fmt = ("Kind change from %(from_kind)s to %(to_kind)s for " "%(path)s not supported by format %(format)r") def __init__(self, path, from_kind, to_kind, format): self.path = path self.from_kind = from_kind self.to_kind = to_kind self.format = format class MissingFeature(BzrError): _fmt = ("Missing feature %(feature)s not provided by this " "version of Bazaar or any plugin.") def __init__(self, feature): self.feature = feature class PatchSyntax(BzrError): """Base class for patch syntax errors.""" class BinaryFiles(BzrError): _fmt = 'Binary files section encountered.' def __init__(self, orig_name, mod_name): self.orig_name = orig_name self.mod_name = mod_name class MalformedPatchHeader(PatchSyntax): _fmt = "Malformed patch header. %(desc)s\n%(line)r" def __init__(self, desc, line): self.desc = desc self.line = line class MalformedHunkHeader(PatchSyntax): _fmt = "Malformed hunk header. %(desc)s\n%(line)r" def __init__(self, desc, line): self.desc = desc self.line = line class MalformedLine(PatchSyntax): _fmt = "Malformed line. %(desc)s\n%(line)r" def __init__(self, desc, line): self.desc = desc self.line = line class PatchConflict(BzrError): _fmt = ('Text contents mismatch at line %(line_no)d. Original has ' '"%(orig_line)s", but patch says it should be "%(patch_line)s"') def __init__(self, line_no, orig_line, patch_line): self.line_no = line_no self.orig_line = orig_line.rstrip('\n') self.patch_line = patch_line.rstrip('\n') class FeatureAlreadyRegistered(BzrError): _fmt = 'The feature %(feature)s has already been registered.' def __init__(self, feature): self.feature = feature class ChangesAlreadyStored(BzrCommandError): _fmt = ('Cannot store uncommitted changes because this branch already' ' stores uncommitted changes.') bzr-2.7.0/bzrlib/estimate_compressed_size.py0000644000000000000000000000555311673635356017421 0ustar 00000000000000# Copyright (C) 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Code to estimate the entropy of content""" from __future__ import absolute_import import zlib class ZLibEstimator(object): """Uses zlib.compressobj to estimate compressed size.""" def __init__(self, target_size, min_compression=2.0): """Create a new estimator. :param target_size: The desired size of the compressed content. :param min_compression: Estimated minimum compression. By default we assume that the content is 'text', which means a min compression of about 2:1. """ self._target_size = target_size self._compressor = zlib.compressobj() self._uncompressed_size_added = 0 self._compressed_size_added = 0 self._unflushed_size_added = 0 self._estimated_compression = 2.0 def add_content(self, content): self._uncompressed_size_added += len(content) self._unflushed_size_added += len(content) z_size = len(self._compressor.compress(content)) if z_size > 0: self._record_z_len(z_size) def _record_z_len(self, count): # We got some compressed bytes, update the counters self._compressed_size_added += count self._unflushed_size_added = 0 # So far we've read X uncompressed bytes, and written Y compressed # bytes. We should have a decent estimate of the final compression. self._estimated_compression = (float(self._uncompressed_size_added) / self._compressed_size_added) def full(self): """Have we reached the target size?""" if self._unflushed_size_added: remaining_size = self._target_size - self._compressed_size_added # Estimate how much compressed content the unflushed data will # consume est_z_size = (self._unflushed_size_added / self._estimated_compression) if est_z_size >= remaining_size: # We estimate we are close to remaining z_size = len(self._compressor.flush(zlib.Z_SYNC_FLUSH)) self._record_z_len(z_size) return self._compressed_size_added >= self._target_size bzr-2.7.0/bzrlib/export/0000755000000000000000000000000010336222273013250 5ustar 00000000000000bzr-2.7.0/bzrlib/export_pot.py0000644000000000000000000002636611673635356014540 0ustar 00000000000000# Copyright (C) 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # The normalize function is taken from pygettext which is distributed # with Python under the Python License, which is GPL compatible. """Extract docstrings from Bazaar commands. This module only handles bzrlib objects that use strings not directly wrapped by a gettext() call. To generate a complete translation template file, this output needs to be combined with that of xgettext or a similar command for extracting those strings, as is done in the bzr Makefile. Sorting the output is also left to that stage of the process. """ from __future__ import absolute_import import inspect import os from bzrlib import ( commands as _mod_commands, errors, help_topics, option, plugin, help, ) from bzrlib.trace import ( mutter, note, ) from bzrlib.i18n import gettext def _escape(s): s = (s.replace('\\', '\\\\') .replace('\n', '\\n') .replace('\r', '\\r') .replace('\t', '\\t') .replace('"', '\\"') ) return s def _normalize(s): # This converts the various Python string types into a format that # is appropriate for .po files, namely much closer to C style. lines = s.split('\n') if len(lines) == 1: s = '"' + _escape(s) + '"' else: if not lines[-1]: del lines[-1] lines[-1] = lines[-1] + '\n' lines = map(_escape, lines) lineterm = '\\n"\n"' s = '""\n"' + lineterm.join(lines) + '"' return s def _parse_source(source_text): """Get object to lineno mappings from given source_text""" import ast cls_to_lineno = {} str_to_lineno = {} for node in ast.walk(ast.parse(source_text)): # TODO: worry about duplicates? if isinstance(node, ast.ClassDef): # TODO: worry about nesting? cls_to_lineno[node.name] = node.lineno elif isinstance(node, ast.Str): # Python AST gives location of string literal as the line the # string terminates on. It's more useful to have the line the # string begins on. Unfortunately, counting back newlines is # only an approximation as the AST is ignorant of escaping. str_to_lineno[node.s] = node.lineno - node.s.count('\n') return cls_to_lineno, str_to_lineno class _ModuleContext(object): """Record of the location within a source tree""" def __init__(self, path, lineno=1, _source_info=None): self.path = path self.lineno = lineno if _source_info is not None: self._cls_to_lineno, self._str_to_lineno = _source_info @classmethod def from_module(cls, module): """Get new context from module object and parse source for linenos""" sourcepath = inspect.getsourcefile(module) # TODO: fix this to do the right thing rather than rely on cwd relpath = os.path.relpath(sourcepath) return cls(relpath, _source_info=_parse_source("".join(inspect.findsource(module)[0]))) def from_class(self, cls): """Get new context with same details but lineno of class in source""" try: lineno = self._cls_to_lineno[cls.__name__] except (AttributeError, KeyError): mutter("Definition of %r not found in %r", cls, self.path) return self return self.__class__(self.path, lineno, (self._cls_to_lineno, self._str_to_lineno)) def from_string(self, string): """Get new context with same details but lineno of string in source""" try: lineno = self._str_to_lineno[string] except (AttributeError, KeyError): mutter("String %r not found in %r", string[:20], self.path) return self return self.__class__(self.path, lineno, (self._cls_to_lineno, self._str_to_lineno)) class _PotExporter(object): """Write message details to output stream in .pot file format""" def __init__(self, outf, include_duplicates=False): self.outf = outf if include_duplicates: self._msgids = None else: self._msgids = set() self._module_contexts = {} def poentry(self, path, lineno, s, comment=None): if self._msgids is not None: if s in self._msgids: return self._msgids.add(s) if comment is None: comment = '' else: comment = "# %s\n" % comment mutter("Exporting msg %r at line %d in %r", s[:20], lineno, path) self.outf.write( "#: {path}:{lineno}\n" "{comment}" "msgid {msg}\n" "msgstr \"\"\n" "\n".format( path=path, lineno=lineno, comment=comment, msg=_normalize(s))) def poentry_in_context(self, context, string, comment=None): context = context.from_string(string) self.poentry(context.path, context.lineno, string, comment) def poentry_per_paragraph(self, path, lineno, msgid, include=None): # TODO: How to split long help? paragraphs = msgid.split('\n\n') if include is not None: paragraphs = filter(include, paragraphs) for p in paragraphs: self.poentry(path, lineno, p) lineno += p.count('\n') + 2 def get_context(self, obj): module = inspect.getmodule(obj) try: context = self._module_contexts[module.__name__] except KeyError: context = _ModuleContext.from_module(module) self._module_contexts[module.__name__] = context if inspect.isclass(obj): context = context.from_class(obj) return context def _write_option(exporter, context, opt, note): if getattr(opt, 'hidden', False): return optname = opt.name if getattr(opt, 'title', None): exporter.poentry_in_context(context, opt.title, "title of {name!r} {what}".format(name=optname, what=note)) for name, _, _, helptxt in opt.iter_switches(): if name != optname: if opt.is_hidden(name): continue name = "=".join([optname, name]) if helptxt: exporter.poentry_in_context(context, helptxt, "help of {name!r} {what}".format(name=name, what=note)) def _standard_options(exporter): OPTIONS = option.Option.OPTIONS context = exporter.get_context(option) for name in sorted(OPTIONS.keys()): opt = OPTIONS[name] _write_option(exporter, context.from_string(name), opt, "option") def _command_options(exporter, context, cmd): note = "option of {0!r} command".format(cmd.name()) for opt in cmd.takes_options: # String values in Command option lists are for global options if not isinstance(opt, str): _write_option(exporter, context, opt, note) def _write_command_help(exporter, cmd): context = exporter.get_context(cmd.__class__) rawdoc = cmd.__doc__ dcontext = context.from_string(rawdoc) doc = inspect.cleandoc(rawdoc) def exclude_usage(p): # ':Usage:' has special meaning in help topics. # This is usage example of command and should not be translated. if p.splitlines()[0] != ':Usage:': return True exporter.poentry_per_paragraph(dcontext.path, dcontext.lineno, doc, exclude_usage) _command_options(exporter, context, cmd) def _command_helps(exporter, plugin_name=None): """Extract docstrings from path. This respects the Bazaar cmdtable/table convention and will only extract docstrings from functions mentioned in these tables. """ from glob import glob # builtin commands for cmd_name in _mod_commands.builtin_command_names(): command = _mod_commands.get_cmd_object(cmd_name, False) if command.hidden: continue if plugin_name is not None: # only export builtins if we are not exporting plugin commands continue note(gettext("Exporting messages from builtin command: %s"), cmd_name) _write_command_help(exporter, command) plugin_path = plugin.get_core_plugin_path() core_plugins = glob(plugin_path + '/*/__init__.py') core_plugins = [os.path.basename(os.path.dirname(p)) for p in core_plugins] # plugins for cmd_name in _mod_commands.plugin_command_names(): command = _mod_commands.get_cmd_object(cmd_name, False) if command.hidden: continue if plugin_name is not None and command.plugin_name() != plugin_name: # if we are exporting plugin commands, skip plugins we have not specified. continue if plugin_name is None and command.plugin_name() not in core_plugins: # skip non-core plugins # TODO: Support extracting from third party plugins. continue note(gettext("Exporting messages from plugin command: {0} in {1}").format( cmd_name, command.plugin_name() )) _write_command_help(exporter, command) def _error_messages(exporter): """Extract fmt string from bzrlib.errors.""" context = exporter.get_context(errors) base_klass = errors.BzrError for name in dir(errors): klass = getattr(errors, name) if not inspect.isclass(klass): continue if not issubclass(klass, base_klass): continue if klass is base_klass: continue if klass.internal_error: continue fmt = getattr(klass, "_fmt", None) if fmt: note(gettext("Exporting message from error: %s"), name) exporter.poentry_in_context(context, fmt) def _help_topics(exporter): topic_registry = help_topics.topic_registry for key in topic_registry.keys(): doc = topic_registry.get(key) if isinstance(doc, str): exporter.poentry_per_paragraph( 'dummy/help_topics/'+key+'/detail.txt', 1, doc) elif callable(doc): # help topics from files exporter.poentry_per_paragraph( 'en/help_topics/'+key+'.txt', 1, doc(key)) summary = topic_registry.get_summary(key) if summary is not None: exporter.poentry('dummy/help_topics/'+key+'/summary.txt', 1, summary) def export_pot(outf, plugin=None, include_duplicates=False): exporter = _PotExporter(outf, include_duplicates) if plugin is None: _standard_options(exporter) _command_helps(exporter) _error_messages(exporter) _help_topics(exporter) else: _command_helps(exporter, plugin) bzr-2.7.0/bzrlib/externalcommand.py0000644000000000000000000000415511673403246015475 0ustar 00000000000000# Copyright (C) 2004, 2005 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import # TODO: Perhaps rather than mapping options and arguments back and # forth, we should just pass in the whole argv, and allow # ExternalCommands to handle it differently to internal commands? import os from bzrlib.commands import Command class ExternalCommand(Command): """Class to wrap external commands.""" @classmethod def find_command(cls, cmd): import os.path bzrpath = os.environ.get('BZRPATH', '') for dir in bzrpath.split(os.pathsep): ## Empty directories are not real paths if not dir: continue # This needs to be os.path.join() or windows cannot # find the batch file that you are wanting to execute path = os.path.join(dir, cmd) if os.path.isfile(path): return ExternalCommand(path) return None def __init__(self, path): self.path = path def name(self): return os.path.basename(self.path) def run(self, *args, **kwargs): raise NotImplementedError('should not be called on %r' % self) def run_argv_aliases(self, argv, alias_argv=None): return os.spawnv(os.P_WAIT, self.path, [self.path] + argv) def help(self): m = 'external command from %s\n\n' % self.path pipe = os.popen('%s --help' % self.path) return m + pipe.read() bzr-2.7.0/bzrlib/fetch.py0000644000000000000000000004354311707554770013420 0ustar 00000000000000# Copyright (C) 2005-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Copying of history from one branch to another. The basic plan is that every branch knows the history of everything that has merged into it. As the first step of a merge, pull, or branch operation we copy history from the source into the destination branch. """ from __future__ import absolute_import import operator from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( tsort, versionedfile, vf_search, ) """) from bzrlib import ( errors, ui, ) from bzrlib.i18n import gettext from bzrlib.revision import NULL_REVISION from bzrlib.trace import mutter class RepoFetcher(object): """Pull revisions and texts from one repository to another. This should not be used directly, it's essential a object to encapsulate the logic in InterRepository.fetch(). """ def __init__(self, to_repository, from_repository, last_revision=None, find_ghosts=True, fetch_spec=None): """Create a repo fetcher. :param last_revision: If set, try to limit to the data this revision references. :param fetch_spec: A SearchResult specifying which revisions to fetch. If set, this overrides last_revision. :param find_ghosts: If True search the entire history for ghosts. """ # repository.fetch has the responsibility for short-circuiting # attempts to copy between a repository and itself. self.to_repository = to_repository self.from_repository = from_repository self.sink = to_repository._get_sink() # must not mutate self._last_revision as its potentially a shared instance self._last_revision = last_revision self._fetch_spec = fetch_spec self.find_ghosts = find_ghosts self.from_repository.lock_read() mutter("Using fetch logic to copy between %s(%s) and %s(%s)", self.from_repository, self.from_repository._format, self.to_repository, self.to_repository._format) try: self.__fetch() finally: self.from_repository.unlock() def __fetch(self): """Primary worker function. This initialises all the needed variables, and then fetches the requested revisions, finally clearing the progress bar. """ # Roughly this is what we're aiming for fetch to become: # # missing = self.sink.insert_stream(self.source.get_stream(search)) # if missing: # missing = self.sink.insert_stream(self.source.get_items(missing)) # assert not missing self.count_total = 0 self.file_ids_names = {} pb = ui.ui_factory.nested_progress_bar() pb.show_pct = pb.show_count = False try: pb.update(gettext("Finding revisions"), 0, 2) search_result = self._revids_to_fetch() mutter('fetching: %s', search_result) if search_result.is_empty(): return pb.update(gettext("Fetching revisions"), 1, 2) self._fetch_everything_for_search(search_result) finally: pb.finished() def _fetch_everything_for_search(self, search): """Fetch all data for the given set of revisions.""" # The first phase is "file". We pass the progress bar for it directly # into item_keys_introduced_by, which has more information about how # that phase is progressing than we do. Progress updates for the other # phases are taken care of in this function. # XXX: there should be a clear owner of the progress reporting. Perhaps # item_keys_introduced_by should have a richer API than it does at the # moment, so that it can feed the progress information back to this # function? if (self.from_repository._format.rich_root_data and not self.to_repository._format.rich_root_data): raise errors.IncompatibleRepositories( self.from_repository, self.to_repository, "different rich-root support") pb = ui.ui_factory.nested_progress_bar() try: pb.update("Get stream source") source = self.from_repository._get_source( self.to_repository._format) stream = source.get_stream(search) from_format = self.from_repository._format pb.update("Inserting stream") resume_tokens, missing_keys = self.sink.insert_stream( stream, from_format, []) if missing_keys: pb.update("Missing keys") stream = source.get_stream_for_missing_keys(missing_keys) pb.update("Inserting missing keys") resume_tokens, missing_keys = self.sink.insert_stream( stream, from_format, resume_tokens) if missing_keys: raise AssertionError( "second push failed to complete a fetch %r." % ( missing_keys,)) if resume_tokens: raise AssertionError( "second push failed to commit the fetch %r." % ( resume_tokens,)) pb.update("Finishing stream") self.sink.finished() finally: pb.finished() def _revids_to_fetch(self): """Determines the exact revisions needed from self.from_repository to install self._last_revision in self.to_repository. :returns: A SearchResult of some sort. (Possibly a PendingAncestryResult, EmptySearchResult, etc.) """ if self._fetch_spec is not None: # The fetch spec is already a concrete search result. return self._fetch_spec elif self._last_revision == NULL_REVISION: # fetch_spec is None + last_revision is null => empty fetch. # explicit limit of no revisions needed return vf_search.EmptySearchResult() elif self._last_revision is not None: return vf_search.NotInOtherForRevs(self.to_repository, self.from_repository, [self._last_revision], find_ghosts=self.find_ghosts).execute() else: # self._last_revision is None: return vf_search.EverythingNotInOther(self.to_repository, self.from_repository, find_ghosts=self.find_ghosts).execute() class Inter1and2Helper(object): """Helper for operations that convert data from model 1 and 2 This is for use by fetchers and converters. """ # This is a class variable so that the test suite can override it. known_graph_threshold = 100 def __init__(self, source): """Constructor. :param source: The repository data comes from """ self.source = source def iter_rev_trees(self, revs): """Iterate through RevisionTrees efficiently. Additionally, the inventory's revision_id is set if unset. Trees are retrieved in batches of 100, and then yielded in the order they were requested. :param revs: A list of revision ids """ # In case that revs is not a list. revs = list(revs) while revs: for tree in self.source.revision_trees(revs[:100]): if tree.root_inventory.revision_id is None: tree.root_inventory.revision_id = tree.get_revision_id() yield tree revs = revs[100:] def _find_root_ids(self, revs, parent_map, graph): revision_root = {} for tree in self.iter_rev_trees(revs): root_id = tree.get_root_id() revision_id = tree.get_file_revision(root_id, u"") revision_root[revision_id] = root_id # Find out which parents we don't already know root ids for parents = set() for revision_parents in parent_map.itervalues(): parents.update(revision_parents) parents.difference_update(revision_root.keys() + [NULL_REVISION]) # Limit to revisions present in the versionedfile parents = graph.get_parent_map(parents).keys() for tree in self.iter_rev_trees(parents): root_id = tree.get_root_id() revision_root[tree.get_revision_id()] = root_id return revision_root def generate_root_texts(self, revs): """Generate VersionedFiles for all root ids. :param revs: the revisions to include """ graph = self.source.get_graph() parent_map = graph.get_parent_map(revs) rev_order = tsort.topo_sort(parent_map) rev_id_to_root_id = self._find_root_ids(revs, parent_map, graph) root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in rev_order] # Guaranteed stable, this groups all the file id operations together # retaining topological order within the revisions of a file id. # File id splits and joins would invalidate this, but they don't exist # yet, and are unlikely to in non-rich-root environments anyway. root_id_order.sort(key=operator.itemgetter(0)) # Create a record stream containing the roots to create. if len(revs) > self.known_graph_threshold: graph = self.source.get_known_graph_ancestry(revs) new_roots_stream = _new_root_data_stream( root_id_order, rev_id_to_root_id, parent_map, self.source, graph) return [('texts', new_roots_stream)] def _new_root_data_stream( root_keys_to_create, rev_id_to_root_id_map, parent_map, repo, graph=None): """Generate a texts substream of synthesised root entries. Used in fetches that do rich-root upgrades. :param root_keys_to_create: iterable of (root_id, rev_id) pairs describing the root entries to create. :param rev_id_to_root_id_map: dict of known rev_id -> root_id mappings for calculating the parents. If a parent rev_id is not found here then it will be recalculated. :param parent_map: a parent map for all the revisions in root_keys_to_create. :param graph: a graph to use instead of repo.get_graph(). """ for root_key in root_keys_to_create: root_id, rev_id = root_key parent_keys = _parent_keys_for_root_version( root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph) yield versionedfile.FulltextContentFactory( root_key, parent_keys, None, '') def _parent_keys_for_root_version( root_id, rev_id, rev_id_to_root_id_map, parent_map, repo, graph=None): """Get the parent keys for a given root id. A helper function for _new_root_data_stream. """ # Include direct parents of the revision, but only if they used the same # root_id and are heads. rev_parents = parent_map[rev_id] parent_ids = [] for parent_id in rev_parents: if parent_id == NULL_REVISION: continue if parent_id not in rev_id_to_root_id_map: # We probably didn't read this revision, go spend the extra effort # to actually check try: tree = repo.revision_tree(parent_id) except errors.NoSuchRevision: # Ghost, fill out rev_id_to_root_id in case we encounter this # again. # But set parent_root_id to None since we don't really know parent_root_id = None else: parent_root_id = tree.get_root_id() rev_id_to_root_id_map[parent_id] = None # XXX: why not: # rev_id_to_root_id_map[parent_id] = parent_root_id # memory consumption maybe? else: parent_root_id = rev_id_to_root_id_map[parent_id] if root_id == parent_root_id: # With stacking we _might_ want to refer to a non-local revision, # but this code path only applies when we have the full content # available, so ghosts really are ghosts, not just the edge of # local data. parent_ids.append(parent_id) else: # root_id may be in the parent anyway. try: tree = repo.revision_tree(parent_id) except errors.NoSuchRevision: # ghost, can't refer to it. pass else: try: parent_ids.append(tree.get_file_revision(root_id)) except errors.NoSuchId: # not in the tree pass # Drop non-head parents if graph is None: graph = repo.get_graph() heads = graph.heads(parent_ids) selected_ids = [] for parent_id in parent_ids: if parent_id in heads and parent_id not in selected_ids: selected_ids.append(parent_id) parent_keys = [(root_id, parent_id) for parent_id in selected_ids] return parent_keys class TargetRepoKinds(object): """An enum-like set of constants. They are the possible values of FetchSpecFactory.target_repo_kinds. """ PREEXISTING = 'preexisting' STACKED = 'stacked' EMPTY = 'empty' class FetchSpecFactory(object): """A helper for building the best fetch spec for a sprout call. Factors that go into determining the sort of fetch to perform: * did the caller specify any revision IDs? * did the caller specify a source branch (need to fetch its heads_to_fetch(), usually the tip + tags) * is there an existing target repo (don't need to refetch revs it already has) * target is stacked? (similar to pre-existing target repo: even if the target itself is new don't want to refetch existing revs) :ivar source_branch: the source branch if one specified, else None. :ivar source_branch_stop_revision_id: fetch up to this revision of source_branch, rather than its tip. :ivar source_repo: the source repository if one found, else None. :ivar target_repo: the target repository acquired by sprout. :ivar target_repo_kind: one of the TargetRepoKinds constants. """ def __init__(self): self._explicit_rev_ids = set() self.source_branch = None self.source_branch_stop_revision_id = None self.source_repo = None self.target_repo = None self.target_repo_kind = None self.limit = None def add_revision_ids(self, revision_ids): """Add revision_ids to the set of revision_ids to be fetched.""" self._explicit_rev_ids.update(revision_ids) def make_fetch_spec(self): """Build a SearchResult or PendingAncestryResult or etc.""" if self.target_repo_kind is None or self.source_repo is None: raise AssertionError( 'Incomplete FetchSpecFactory: %r' % (self.__dict__,)) if len(self._explicit_rev_ids) == 0 and self.source_branch is None: if self.limit is not None: raise NotImplementedError( "limit is only supported with a source branch set") # Caller hasn't specified any revisions or source branch if self.target_repo_kind == TargetRepoKinds.EMPTY: return vf_search.EverythingResult(self.source_repo) else: # We want everything not already in the target (or target's # fallbacks). return vf_search.EverythingNotInOther( self.target_repo, self.source_repo).execute() heads_to_fetch = set(self._explicit_rev_ids) if self.source_branch is not None: must_fetch, if_present_fetch = self.source_branch.heads_to_fetch() if self.source_branch_stop_revision_id is not None: # Replace the tip rev from must_fetch with the stop revision # XXX: this might be wrong if the tip rev is also in the # must_fetch set for other reasons (e.g. it's the tip of # multiple loom threads?), but then it's pretty unclear what it # should mean to specify a stop_revision in that case anyway. must_fetch.discard(self.source_branch.last_revision()) must_fetch.add(self.source_branch_stop_revision_id) heads_to_fetch.update(must_fetch) else: if_present_fetch = set() if self.target_repo_kind == TargetRepoKinds.EMPTY: # PendingAncestryResult does not raise errors if a requested head # is absent. Ideally it would support the # required_ids/if_present_ids distinction, but in practice # heads_to_fetch will almost certainly be present so this doesn't # matter much. all_heads = heads_to_fetch.union(if_present_fetch) ret = vf_search.PendingAncestryResult(all_heads, self.source_repo) if self.limit is not None: graph = self.source_repo.get_graph() topo_order = list(graph.iter_topo_order(ret.get_keys())) result_set = topo_order[:self.limit] ret = self.source_repo.revision_ids_to_search_result(result_set) return ret else: return vf_search.NotInOtherForRevs(self.target_repo, self.source_repo, required_ids=heads_to_fetch, if_present_ids=if_present_fetch, limit=self.limit).execute() bzr-2.7.0/bzrlib/fifo_cache.py0000644000000000000000000002433011673635356014370 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """A simple first-in-first-out (FIFO) cache.""" from __future__ import absolute_import from collections import deque class FIFOCache(dict): """A class which manages a cache of entries, removing old ones.""" def __init__(self, max_cache=100, after_cleanup_count=None): dict.__init__(self) self._max_cache = max_cache if after_cleanup_count is None: self._after_cleanup_count = self._max_cache * 8 / 10 else: self._after_cleanup_count = min(after_cleanup_count, self._max_cache) self._cleanup = {} # map to cleanup functions when items are removed self._queue = deque() # Track when things are accessed def __setitem__(self, key, value): """Add a value to the cache, there will be no cleanup function.""" self.add(key, value, cleanup=None) def __delitem__(self, key): # Remove the key from an arbitrary location in the queue remove = getattr(self._queue, 'remove', None) # Python2.5's has deque.remove, but Python2.4 does not if remove is not None: remove(key) else: # TODO: It would probably be faster to pop()/popleft() until we get to the # key, and then insert those back into the queue. We know # the key should only be present in one position, and we # wouldn't need to rebuild the whole queue. self._queue = deque([k for k in self._queue if k != key]) self._remove(key) def add(self, key, value, cleanup=None): """Add a new value to the cache. Also, if the entry is ever removed from the queue, call cleanup. Passing it the key and value being removed. :param key: The key to store it under :param value: The object to store :param cleanup: None or a function taking (key, value) to indicate 'value' should be cleaned up """ if key in self: # Remove the earlier reference to this key, adding it again bumps # it to the end of the queue del self[key] self._queue.append(key) dict.__setitem__(self, key, value) if cleanup is not None: self._cleanup[key] = cleanup if len(self) > self._max_cache: self.cleanup() def cache_size(self): """Get the number of entries we will cache.""" return self._max_cache def cleanup(self): """Clear the cache until it shrinks to the requested size. This does not completely wipe the cache, just makes sure it is under the after_cleanup_count. """ # Make sure the cache is shrunk to the correct size while len(self) > self._after_cleanup_count: self._remove_oldest() if len(self._queue) != len(self): raise AssertionError('The length of the queue should always equal' ' the length of the dict. %s != %s' % (len(self._queue), len(self))) def clear(self): """Clear out all of the cache.""" # Clean up in FIFO order while self: self._remove_oldest() def _remove(self, key): """Remove an entry, making sure to call any cleanup function.""" cleanup = self._cleanup.pop(key, None) # We override self.pop() because it doesn't play well with cleanup # functions. val = dict.pop(self, key) if cleanup is not None: cleanup(key, val) return val def _remove_oldest(self): """Remove the oldest entry.""" key = self._queue.popleft() self._remove(key) def resize(self, max_cache, after_cleanup_count=None): """Increase/decrease the number of cached entries. :param max_cache: The maximum number of entries to cache. :param after_cleanup_count: After cleanup, we should have at most this many entries. This defaults to 80% of max_cache. """ self._max_cache = max_cache if after_cleanup_count is None: self._after_cleanup_count = max_cache * 8 / 10 else: self._after_cleanup_count = min(max_cache, after_cleanup_count) if len(self) > self._max_cache: self.cleanup() # raise NotImplementedError on dict functions that would mutate the cache # which have not been properly implemented yet. def copy(self): raise NotImplementedError(self.copy) def pop(self, key, default=None): # If there is a cleanup() function, than it is unclear what pop() # should do. Specifically, we would have to call the cleanup on the # value before we return it, which should cause whatever resources were # allocated to be removed, which makes the return value fairly useless. # So instead, we just don't implement it. raise NotImplementedError(self.pop) def popitem(self): # See pop() raise NotImplementedError(self.popitem) def setdefault(self, key, defaultval=None): """similar to dict.setdefault""" if key in self: return self[key] self[key] = defaultval return defaultval def update(self, *args, **kwargs): """Similar to dict.update()""" if len(args) == 1: arg = args[0] if isinstance(arg, dict): for key, val in arg.iteritems(): self.add(key, val) else: for key, val in args[0]: self.add(key, val) elif len(args) > 1: raise TypeError('update expected at most 1 argument, got %d' % len(args)) if kwargs: for key, val in kwargs.iteritems(): self.add(key, val) class FIFOSizeCache(FIFOCache): """An FIFOCache that removes things based on the size of the values. This differs in that it doesn't care how many actual items there are, it restricts the cache to be cleaned based on the size of the data. """ def __init__(self, max_size=1024*1024, after_cleanup_size=None, compute_size=None): """Create a new FIFOSizeCache. :param max_size: The max number of bytes to store before we start clearing out entries. :param after_cleanup_size: After cleaning up, shrink everything to this size (defaults to 80% of max_size). :param compute_size: A function to compute the size of a value. If not supplied we default to 'len'. """ # Arbitrary, we won't really be using the value anyway. FIFOCache.__init__(self, max_cache=max_size) self._max_size = max_size if after_cleanup_size is None: self._after_cleanup_size = self._max_size * 8 / 10 else: self._after_cleanup_size = min(after_cleanup_size, self._max_size) self._value_size = 0 self._compute_size = compute_size if compute_size is None: self._compute_size = len def add(self, key, value, cleanup=None): """Add a new value to the cache. Also, if the entry is ever removed from the queue, call cleanup. Passing it the key and value being removed. :param key: The key to store it under :param value: The object to store, this value by itself is >= after_cleanup_size, then we will not store it at all. :param cleanup: None or a function taking (key, value) to indicate 'value' sohuld be cleaned up. """ # Even if the new value won't be stored, we need to remove the old # value if key in self: # Remove the earlier reference to this key, adding it again bumps # it to the end of the queue del self[key] value_len = self._compute_size(value) if value_len >= self._after_cleanup_size: return self._queue.append(key) dict.__setitem__(self, key, value) if cleanup is not None: self._cleanup[key] = cleanup self._value_size += value_len if self._value_size > self._max_size: # Time to cleanup self.cleanup() def cache_size(self): """Get the number of bytes we will cache.""" return self._max_size def cleanup(self): """Clear the cache until it shrinks to the requested size. This does not completely wipe the cache, just makes sure it is under the after_cleanup_size. """ # Make sure the cache is shrunk to the correct size while self._value_size > self._after_cleanup_size: self._remove_oldest() def _remove(self, key): """Remove an entry, making sure to maintain the invariants.""" val = FIFOCache._remove(self, key) self._value_size -= self._compute_size(val) return val def resize(self, max_size, after_cleanup_size=None): """Increase/decrease the amount of cached data. :param max_size: The maximum number of bytes to cache. :param after_cleanup_size: After cleanup, we should have at most this many bytes cached. This defaults to 80% of max_size. """ FIFOCache.resize(self, max_size) self._max_size = max_size if after_cleanup_size is None: self._after_cleanup_size = max_size * 8 / 10 else: self._after_cleanup_size = min(max_size, after_cleanup_size) if self._value_size > self._max_size: self.cleanup() bzr-2.7.0/bzrlib/filter_tree.py0000644000000000000000000000552012006505514014605 0ustar 00000000000000# Copyright (C) 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Content-filtered view of any tree. """ from __future__ import absolute_import from bzrlib import ( tree, ) from bzrlib.filters import ( ContentFilterContext, filtered_output_bytes, ) class ContentFilterTree(tree.Tree): """A virtual tree that applies content filters to an underlying tree. Not every operation is supported yet. """ def __init__(self, backing_tree, filter_stack_callback): """Construct a new filtered tree view. :param filter_stack_callback: A callable taking a path that returns the filter stack that should be used for that path. :param backing_tree: An underlying tree to wrap. """ self.backing_tree = backing_tree self.filter_stack_callback = filter_stack_callback def get_file_text(self, file_id, path=None): chunks = self.backing_tree.get_file_lines(file_id, path) if path is None: path = self.backing_tree.id2path(file_id) filters = self.filter_stack_callback(path) context = ContentFilterContext(path, self, None) contents = filtered_output_bytes(chunks, filters, context) content = ''.join(contents) return content def has_filename(self, filename): return self.backing_tree.has_filename def is_executable(self, file_id, path=None): return self.backing_tree.is_executable(file_id, path) def iter_entries_by_dir(self, specific_file_ids=None, yield_parents=None): # NB: This simply returns the parent tree's entries; the length may be # wrong but it can't easily be calculated without filtering the whole # text. Currently all callers cope with this; perhaps they should be # updated to a narrower interface that only provides things guaranteed # cheaply available across all trees. -- mbp 20110705 return self.backing_tree.iter_entries_by_dir( specific_file_ids=specific_file_ids, yield_parents=yield_parents) def lock_read(self): return self.backing_tree.lock_read() def unlock(self): return self.backing_tree.unlock() bzr-2.7.0/bzrlib/filters/0000755000000000000000000000000011001604172013367 5ustar 00000000000000bzr-2.7.0/bzrlib/foreign.py0000644000000000000000000003021411705551217013736 0ustar 00000000000000# Copyright (C) 2008-2012 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Foreign branch utilities.""" from __future__ import absolute_import from bzrlib.branch import ( Branch, ) from bzrlib.commands import Command, Option from bzrlib.repository import Repository from bzrlib.revision import Revision from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( errors, registry, transform, ) from bzrlib.i18n import gettext """) class VcsMapping(object): """Describes the mapping between the semantics of Bazaar and a foreign VCS. """ # Whether this is an experimental mapping that is still open to changes. experimental = False # Whether this mapping supports exporting and importing all bzr semantics. roundtripping = False # Prefix used when importing revisions native to the foreign VCS (as # opposed to roundtripping bzr-native revisions) using this mapping. revid_prefix = None def __init__(self, vcs): """Create a new VcsMapping. :param vcs: VCS that this mapping maps to Bazaar """ self.vcs = vcs def revision_id_bzr_to_foreign(self, bzr_revid): """Parse a bzr revision id and convert it to a foreign revid. :param bzr_revid: The bzr revision id (a string). :return: A foreign revision id, can be any sort of object. """ raise NotImplementedError(self.revision_id_bzr_to_foreign) def revision_id_foreign_to_bzr(self, foreign_revid): """Parse a foreign revision id and convert it to a bzr revid. :param foreign_revid: Foreign revision id, can be any sort of object. :return: A bzr revision id. """ raise NotImplementedError(self.revision_id_foreign_to_bzr) class VcsMappingRegistry(registry.Registry): """Registry for Bazaar<->foreign VCS mappings. There should be one instance of this registry for every foreign VCS. """ def register(self, key, factory, help): """Register a mapping between Bazaar and foreign VCS semantics. The factory must be a callable that takes one parameter: the key. It must produce an instance of VcsMapping when called. """ if ":" in key: raise ValueError("mapping name can not contain colon (:)") registry.Registry.register(self, key, factory, help) def set_default(self, key): """Set the 'default' key to be a clone of the supplied key. This method must be called once and only once. """ self._set_default_key(key) def get_default(self): """Convenience function for obtaining the default mapping to use.""" return self.get(self._get_default_key()) def revision_id_bzr_to_foreign(self, revid): """Convert a bzr revision id to a foreign revid.""" raise NotImplementedError(self.revision_id_bzr_to_foreign) class ForeignRevision(Revision): """A Revision from a Foreign repository. Remembers information about foreign revision id and mapping. """ def __init__(self, foreign_revid, mapping, *args, **kwargs): if not "inventory_sha1" in kwargs: kwargs["inventory_sha1"] = "" super(ForeignRevision, self).__init__(*args, **kwargs) self.foreign_revid = foreign_revid self.mapping = mapping class ForeignVcs(object): """A foreign version control system.""" branch_format = None repository_format = None def __init__(self, mapping_registry, abbreviation=None): """Create a new foreign vcs instance. :param mapping_registry: Registry with mappings for this VCS. :param abbreviation: Optional abbreviation ('bzr', 'svn', 'git', etc) """ self.abbreviation = abbreviation self.mapping_registry = mapping_registry def show_foreign_revid(self, foreign_revid): """Prepare a foreign revision id for formatting using bzr log. :param foreign_revid: Foreign revision id. :return: Dictionary mapping string keys to string values. """ return { } def serialize_foreign_revid(self, foreign_revid): """Serialize a foreign revision id for this VCS. :param foreign_revid: Foreign revision id :return: Bytestring with serialized revid, will not contain any newlines. """ raise NotImplementedError(self.serialize_foreign_revid) class ForeignVcsRegistry(registry.Registry): """Registry for Foreign VCSes. There should be one entry per foreign VCS. Example entries would be "git", "svn", "hg", "darcs", etc. """ def register(self, key, foreign_vcs, help): """Register a foreign VCS. :param key: Prefix of the foreign VCS in revision ids :param foreign_vcs: ForeignVCS instance :param help: Description of the foreign VCS """ if ":" in key or "-" in key: raise ValueError("vcs name can not contain : or -") registry.Registry.register(self, key, foreign_vcs, help) def parse_revision_id(self, revid): """Parse a bzr revision and return the matching mapping and foreign revid. :param revid: The bzr revision id :return: tuple with foreign revid and vcs mapping """ if not ":" in revid or not "-" in revid: raise errors.InvalidRevisionId(revid, None) try: foreign_vcs = self.get(revid.split("-")[0]) except KeyError: raise errors.InvalidRevisionId(revid, None) return foreign_vcs.mapping_registry.revision_id_bzr_to_foreign(revid) foreign_vcs_registry = ForeignVcsRegistry() class ForeignRepository(Repository): """A Repository that exists in a foreign version control system. The data in this repository can not be represented natively using Bazaars internal datastructures, but have to converted using a VcsMapping. """ # This repository's native version control system vcs = None def has_foreign_revision(self, foreign_revid): """Check whether the specified foreign revision is present. :param foreign_revid: A foreign revision id, in the format used by this Repository's VCS. """ raise NotImplementedError(self.has_foreign_revision) def lookup_bzr_revision_id(self, revid): """Lookup a mapped or roundtripped revision by revision id. :param revid: Bazaar revision id :return: Tuple with foreign revision id and mapping. """ raise NotImplementedError(self.lookup_revision_id) def all_revision_ids(self, mapping=None): """See Repository.all_revision_ids().""" raise NotImplementedError(self.all_revision_ids) def get_default_mapping(self): """Get the default mapping for this repository.""" raise NotImplementedError(self.get_default_mapping) class ForeignBranch(Branch): """Branch that exists in a foreign version control system.""" def __init__(self, mapping): self.mapping = mapping super(ForeignBranch, self).__init__() def update_workingtree_fileids(wt, target_tree): """Update the file ids in a working tree based on another tree. :param wt: Working tree in which to update file ids :param target_tree: Tree to retrieve new file ids from, based on path """ tt = transform.TreeTransform(wt) try: for f, p, c, v, d, n, k, e in target_tree.iter_changes(wt): if v == (True, False): trans_id = tt.trans_id_tree_path(p[0]) tt.unversion_file(trans_id) elif v == (False, True): trans_id = tt.trans_id_tree_path(p[1]) tt.version_file(f, trans_id) tt.apply() finally: tt.finalize() if len(wt.get_parent_ids()) == 1: wt.set_parent_trees([(target_tree.get_revision_id(), target_tree)]) else: wt.set_last_revision(target_tree.get_revision_id()) class cmd_dpush(Command): __doc__ = """Push into a different VCS without any custom bzr metadata. This will afterwards rebase the local branch on the remote branch unless the --no-rebase option is used, in which case the two branches will be out of sync after the push. """ takes_args = ['location?'] takes_options = [ 'remember', Option('directory', help='Branch to push from, ' 'rather than the one containing the working directory.', short_name='d', type=unicode, ), Option('no-rebase', help="Do not rebase after push."), Option('strict', help='Refuse to push if there are uncommitted changes in' ' the working tree, --no-strict disables the check.'), ] def run(self, location=None, remember=False, directory=None, no_rebase=False, strict=None): from bzrlib import urlutils from bzrlib.controldir import ControlDir from bzrlib.errors import BzrCommandError, NoWorkingTree from bzrlib.workingtree import WorkingTree if directory is None: directory = "." try: source_wt = WorkingTree.open_containing(directory)[0] source_branch = source_wt.branch except NoWorkingTree: source_branch = Branch.open(directory) source_wt = None if source_wt is not None: source_wt.check_changed_or_out_of_date( strict, 'dpush_strict', more_error='Use --no-strict to force the push.', more_warning='Uncommitted changes will not be pushed.') stored_loc = source_branch.get_push_location() if location is None: if stored_loc is None: raise BzrCommandError(gettext("No push location known or specified.")) else: display_url = urlutils.unescape_for_display(stored_loc, self.outf.encoding) self.outf.write( gettext("Using saved location: %s\n") % display_url) location = stored_loc controldir = ControlDir.open(location) target_branch = controldir.open_branch() target_branch.lock_write() try: try: push_result = source_branch.push(target_branch, lossy=True) except errors.LossyPushToSameVCS: raise BzrCommandError(gettext("{0!r} and {1!r} are in the same VCS, lossy " "push not necessary. Please use regular push.").format( source_branch, target_branch)) # We successfully created the target, remember it if source_branch.get_push_location() is None or remember: # FIXME: Should be done only if we succeed ? -- vila 2012-01-18 source_branch.set_push_location(target_branch.base) if not no_rebase: old_last_revid = source_branch.last_revision() source_branch.pull(target_branch, overwrite=True) new_last_revid = source_branch.last_revision() if source_wt is not None and old_last_revid != new_last_revid: source_wt.lock_write() try: target = source_wt.branch.repository.revision_tree( new_last_revid) update_workingtree_fileids(source_wt, target) finally: source_wt.unlock() push_result.report(self.outf) finally: target_branch.unlock() bzr-2.7.0/bzrlib/generate_ids.py0000644000000000000000000001073211673635356014754 0ustar 00000000000000# Copyright (C) 2006, 2007, 2009, 2010, 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Common code for generating file or revision ids.""" from __future__ import absolute_import from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import time from bzrlib import ( config, errors, osutils, ) """) from bzrlib import ( lazy_regex, ) # the regex removes any weird characters; we don't escape them # but rather just pull them out _file_id_chars_re = lazy_regex.lazy_compile(r'[^\w.]') _rev_id_chars_re = lazy_regex.lazy_compile(r'[^-\w.+@]') _gen_file_id_suffix = None _gen_file_id_serial = 0 def _next_id_suffix(): """Create a new file id suffix that is reasonably unique. On the first call we combine the current time with 64 bits of randomness to give a highly probably globally unique number. Then each call in the same process adds 1 to a serial number we append to that unique value. """ # XXX TODO: change bzrlib.add.smart_add_tree to call workingtree.add() rather # than having to move the id randomness out of the inner loop like this. # XXX TODO: for the global randomness this uses we should add the thread-id # before the serial #. # XXX TODO: jam 20061102 I think it would be good to reset every 100 or # 1000 calls, or perhaps if time.time() increases by a certain # amount. time.time() shouldn't be terribly expensive to call, # and it means that long-lived processes wouldn't use the same # suffix forever. global _gen_file_id_suffix, _gen_file_id_serial if _gen_file_id_suffix is None: _gen_file_id_suffix = "-%s-%s-" % (osutils.compact_date(time.time()), osutils.rand_chars(16)) _gen_file_id_serial += 1 return _gen_file_id_suffix + str(_gen_file_id_serial) def gen_file_id(name): """Return new file id for the basename 'name'. The uniqueness is supplied from _next_id_suffix. """ # The real randomness is in the _next_id_suffix, the # rest of the identifier is just to be nice. # So we: # 1) Remove non-ascii word characters to keep the ids portable # 2) squash to lowercase, so the file id doesn't have to # be escaped (case insensitive filesystems would bork for ids # that only differ in case without escaping). # 3) truncate the filename to 20 chars. Long filenames also bork on some # filesystems # 4) Removing starting '.' characters to prevent the file ids from # being considered hidden. ascii_word_only = str(_file_id_chars_re.sub('', name.lower())) short_no_dots = ascii_word_only.lstrip('.')[:20] return short_no_dots + _next_id_suffix() def gen_root_id(): """Return a new tree-root file id.""" return gen_file_id('tree_root') def gen_revision_id(username, timestamp=None): """Return new revision-id. :param username: The username of the committer, in the format returned by config.username(). This is typically a real name, followed by an email address. If found, we will use just the email address portion. Otherwise we flatten the real name, and use that. :return: A new revision id. """ try: user_or_email = config.extract_email_address(username) except errors.NoEmailInUsername: user_or_email = username user_or_email = user_or_email.lower() user_or_email = user_or_email.replace(' ', '_') user_or_email = _rev_id_chars_re.sub('', user_or_email) # This gives 36^16 ~= 2^82.7 ~= 83 bits of entropy unique_chunk = osutils.rand_chars(16) if timestamp is None: timestamp = time.time() rev_id = u'-'.join((user_or_email, osutils.compact_date(timestamp), unique_chunk)) return rev_id.encode('utf8') bzr-2.7.0/bzrlib/globbing.py0000644000000000000000000003036211673635356014107 0ustar 00000000000000# Copyright (C) 2006-2011 Canonical Ltd # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Tools for converting globs to regular expressions. This module provides functions for converting shell-like globs to regular expressions. """ from __future__ import absolute_import import re from bzrlib import ( errors, lazy_regex, ) from bzrlib.trace import ( mutter, warning, ) class Replacer(object): """Do a multiple-pattern substitution. The patterns and substitutions are combined into one, so the result of one replacement is never substituted again. Add the patterns and replacements via the add method and then call the object. The patterns must not contain capturing groups. """ _expand = lazy_regex.lazy_compile(ur'\\&') def __init__(self, source=None): self._pat = None if source: self._pats = list(source._pats) self._funs = list(source._funs) else: self._pats = [] self._funs = [] def add(self, pat, fun): r"""Add a pattern and replacement. The pattern must not contain capturing groups. The replacement might be either a string template in which \& will be replaced with the match, or a function that will get the matching text as argument. It does not get match object, because capturing is forbidden anyway. """ self._pat = None self._pats.append(pat) self._funs.append(fun) def add_replacer(self, replacer): r"""Add all patterns from another replacer. All patterns and replacements from replacer are appended to the ones already defined. """ self._pat = None self._pats.extend(replacer._pats) self._funs.extend(replacer._funs) def __call__(self, text): if not self._pat: self._pat = lazy_regex.lazy_compile( u'|'.join([u'(%s)' % p for p in self._pats]), re.UNICODE) return self._pat.sub(self._do_sub, text) def _do_sub(self, m): fun = self._funs[m.lastindex - 1] if hasattr(fun, '__call__'): return fun(m.group(0)) else: return self._expand.sub(m.group(0), fun) _sub_named = Replacer() _sub_named.add(ur'\[:digit:\]', ur'\d') _sub_named.add(ur'\[:space:\]', ur'\s') _sub_named.add(ur'\[:alnum:\]', ur'\w') _sub_named.add(ur'\[:ascii:\]', ur'\0-\x7f') _sub_named.add(ur'\[:blank:\]', ur' \t') _sub_named.add(ur'\[:cntrl:\]', ur'\0-\x1f\x7f-\x9f') def _sub_group(m): if m[1] in (u'!', u'^'): return u'[^' + _sub_named(m[2:-1]) + u']' return u'[' + _sub_named(m[1:-1]) + u']' def _invalid_regex(repl): def _(m): warning(u"'%s' not allowed within a regular expression. " "Replacing with '%s'" % (m, repl)) return repl return _ def _trailing_backslashes_regex(m): """Check trailing backslashes. Does a head count on trailing backslashes to ensure there isn't an odd one on the end that would escape the brackets we wrap the RE in. """ if (len(m) % 2) != 0: warning(u"Regular expressions cannot end with an odd number of '\\'. " "Dropping the final '\\'.") return m[:-1] return m _sub_re = Replacer() _sub_re.add(u'^RE:', u'') _sub_re.add(u'\((?!\?)', u'(?:') _sub_re.add(u'\(\?P<.*>', _invalid_regex(u'(?:')) _sub_re.add(u'\(\?P=[^)]*\)', _invalid_regex(u'')) _sub_re.add(ur'\\+$', _trailing_backslashes_regex) _sub_fullpath = Replacer() _sub_fullpath.add(ur'^RE:.*', _sub_re) # RE: is a regex _sub_fullpath.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group _sub_fullpath.add(ur'(?:(?<=/)|^)(?:\.?/)+', u'') # canonicalize path _sub_fullpath.add(ur'\\.', ur'\&') # keep anything backslashed _sub_fullpath.add(ur'[(){}|^$+.]', ur'\\&') # escape specials _sub_fullpath.add(ur'(?:(?<=/)|^)\*\*+/', ur'(?:.*/)?') # **/ after ^ or / _sub_fullpath.add(ur'\*+', ur'[^/]*') # * elsewhere _sub_fullpath.add(ur'\?', ur'[^/]') # ? everywhere _sub_basename = Replacer() _sub_basename.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group _sub_basename.add(ur'\\.', ur'\&') # keep anything backslashed _sub_basename.add(ur'[(){}|^$+.]', ur'\\&') # escape specials _sub_basename.add(ur'\*+', ur'.*') # * everywhere _sub_basename.add(ur'\?', ur'.') # ? everywhere def _sub_extension(pattern): return _sub_basename(pattern[2:]) class Globster(object): """A simple wrapper for a set of glob patterns. Provides the capability to search the patterns to find a match for a given filename (including the full path). Patterns are translated to regular expressions to expidite matching. The regular expressions for multiple patterns are aggregated into a super-regex containing groups of up to 99 patterns. The 99 limitation is due to the grouping limit of the Python re module. The resulting super-regex and associated patterns are stored as a list of (regex,[patterns]) in _regex_patterns. For performance reasons the patterns are categorised as extension patterns (those that match against a file extension), basename patterns (those that match against the basename of the filename), and fullpath patterns (those that match against the full path). The translations used for extensions and basenames are relatively simpler and therefore faster to perform than the fullpath patterns. Also, the extension patterns are more likely to find a match and so are matched first, then the basename patterns, then the fullpath patterns. """ # We want to _add_patterns in a specific order (as per type_list below) # starting with the shortest and going to the longest. # As some Python version don't support ordered dicts the list below is # used to select inputs for _add_pattern in a specific order. pattern_types = [ "extension", "basename", "fullpath" ] pattern_info = { "extension" : { "translator" : _sub_extension, "prefix" : r'(?:.*/)?(?!.*/)(?:.*\.)' }, "basename" : { "translator" : _sub_basename, "prefix" : r'(?:.*/)?(?!.*/)' }, "fullpath" : { "translator" : _sub_fullpath, "prefix" : r'' }, } def __init__(self, patterns): self._regex_patterns = [] pattern_lists = { "extension" : [], "basename" : [], "fullpath" : [], } for pat in patterns: pat = normalize_pattern(pat) pattern_lists[Globster.identify(pat)].append(pat) pi = Globster.pattern_info for t in Globster.pattern_types: self._add_patterns(pattern_lists[t], pi[t]["translator"], pi[t]["prefix"]) def _add_patterns(self, patterns, translator, prefix=''): while patterns: grouped_rules = [ '(%s)' % translator(pat) for pat in patterns[:99]] joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules)) # Explicitly use lazy_compile here, because we count on its # nicer error reporting. self._regex_patterns.append(( lazy_regex.lazy_compile(joined_rule, re.UNICODE), patterns[:99])) patterns = patterns[99:] def match(self, filename): """Searches for a pattern that matches the given filename. :return A matching pattern or None if there is no matching pattern. """ try: for regex, patterns in self._regex_patterns: match = regex.match(filename) if match: return patterns[match.lastindex -1] except errors.InvalidPattern, e: # We can't show the default e.msg to the user as thats for # the combined pattern we sent to regex. Instead we indicate to # the user that an ignore file needs fixing. mutter('Invalid pattern found in regex: %s.', e.msg) e.msg = "File ~/.bazaar/ignore or .bzrignore contains error(s)." bad_patterns = '' for _, patterns in self._regex_patterns: for p in patterns: if not Globster.is_pattern_valid(p): bad_patterns += ('\n %s' % p) e.msg += bad_patterns raise e return None @staticmethod def identify(pattern): """Returns pattern category. :param pattern: normalized pattern. Identify if a pattern is fullpath, basename or extension and returns the appropriate type. """ if pattern.startswith(u'RE:') or u'/' in pattern: return "fullpath" elif pattern.startswith(u'*.'): return "extension" else: return "basename" @staticmethod def is_pattern_valid(pattern): """Returns True if pattern is valid. :param pattern: Normalized pattern. is_pattern_valid() assumes pattern to be normalized. see: globbing.normalize_pattern """ result = True translator = Globster.pattern_info[Globster.identify(pattern)]["translator"] tpattern = '(%s)' % translator(pattern) try: re_obj = lazy_regex.lazy_compile(tpattern, re.UNICODE) re_obj.search("") # force compile except errors.InvalidPattern, e: result = False return result class ExceptionGlobster(object): """A Globster that supports exception patterns. Exceptions are ignore patterns prefixed with '!'. Exception patterns take precedence over regular patterns and cause a matching filename to return None from the match() function. Patterns using a '!!' prefix are highest precedence, and act as regular ignores. '!!' patterns are useful to establish ignores that apply under paths specified by '!' exception patterns. """ def __init__(self,patterns): ignores = [[], [], []] for p in patterns: if p.startswith(u'!!'): ignores[2].append(p[2:]) elif p.startswith(u'!'): ignores[1].append(p[1:]) else: ignores[0].append(p) self._ignores = [Globster(i) for i in ignores] def match(self, filename): """Searches for a pattern that matches the given filename. :return A matching pattern or None if there is no matching pattern. """ double_neg = self._ignores[2].match(filename) if double_neg: return "!!%s" % double_neg elif self._ignores[1].match(filename): return None else: return self._ignores[0].match(filename) class _OrderedGlobster(Globster): """A Globster that keeps pattern order.""" def __init__(self, patterns): """Constructor. :param patterns: sequence of glob patterns """ # Note: This could be smarter by running like sequences together self._regex_patterns = [] for pat in patterns: pat = normalize_pattern(pat) t = Globster.identify(pat) self._add_patterns([pat], Globster.pattern_info[t]["translator"], Globster.pattern_info[t]["prefix"]) _slashes = lazy_regex.lazy_compile(r'[\\/]+') def normalize_pattern(pattern): """Converts backslashes in path patterns to forward slashes. Doesn't normalize regular expressions - they may contain escapes. """ if not (pattern.startswith('RE:') or pattern.startswith('!RE:')): pattern = _slashes.sub('/', pattern) if len(pattern) > 1: pattern = pattern.rstrip('/') return pattern bzr-2.7.0/bzrlib/gpg.py0000644000000000000000000005054012650125144013062 0ustar 00000000000000# Copyright (C) 2005, 2006, 2007, 2009, 2011, 2012, 2013, 2016 Canonical Ltd # Authors: Robert Collins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """GPG signing and checking logic.""" from __future__ import absolute_import import os import sys from StringIO import StringIO from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import errno import subprocess from bzrlib import ( config, errors, trace, ui, ) from bzrlib.i18n import ( gettext, ngettext, ) """) from bzrlib.symbol_versioning import ( deprecated_in, deprecated_method, ) #verification results SIGNATURE_VALID = 0 SIGNATURE_KEY_MISSING = 1 SIGNATURE_NOT_VALID = 2 SIGNATURE_NOT_SIGNED = 3 SIGNATURE_EXPIRED = 4 def bulk_verify_signatures(repository, revids, strategy, process_events_callback=None): """Do verifications on a set of revisions :param repository: repository object :param revids: list of revision ids to verify :param strategy: GPG strategy to use :param process_events_callback: method to call for GUI frontends that want to keep their UI refreshed :return: count dictionary of results of each type, result list for each revision, boolean True if all results are verified successfully """ count = {SIGNATURE_VALID: 0, SIGNATURE_KEY_MISSING: 0, SIGNATURE_NOT_VALID: 0, SIGNATURE_NOT_SIGNED: 0, SIGNATURE_EXPIRED: 0} result = [] all_verifiable = True total = len(revids) pb = ui.ui_factory.nested_progress_bar() try: for i, (rev_id, verification_result, uid) in enumerate( repository.verify_revision_signatures( revids, strategy)): pb.update("verifying signatures", i, total) result.append([rev_id, verification_result, uid]) count[verification_result] += 1 if verification_result != SIGNATURE_VALID: all_verifiable = False if process_events_callback is not None: process_events_callback() finally: pb.finished() return (count, result, all_verifiable) class DisabledGPGStrategy(object): """A GPG Strategy that makes everything fail.""" @staticmethod def verify_signatures_available(): return True def __init__(self, ignored): """Real strategies take a configuration.""" def sign(self, content): raise errors.SigningFailed('Signing is disabled.') def verify(self, content, testament): raise errors.SignatureVerificationFailed('Signature verification is \ disabled.') def set_acceptable_keys(self, command_line_input): pass class LoopbackGPGStrategy(object): """A GPG Strategy that acts like 'cat' - data is just passed through. Used in tests. """ @staticmethod def verify_signatures_available(): return True def __init__(self, ignored): """Real strategies take a configuration.""" def sign(self, content): return ("-----BEGIN PSEUDO-SIGNED CONTENT-----\n" + content + "-----END PSEUDO-SIGNED CONTENT-----\n") def verify(self, content, testament): return SIGNATURE_VALID, None def set_acceptable_keys(self, command_line_input): if command_line_input is not None: patterns = command_line_input.split(",") self.acceptable_keys = [] for pattern in patterns: if pattern == "unknown": pass else: self.acceptable_keys.append(pattern) @deprecated_method(deprecated_in((2, 6, 0))) def do_verifications(self, revisions, repository): return bulk_verify_signatures(repository, revisions, self) @deprecated_method(deprecated_in((2, 6, 0))) def valid_commits_message(self, count): return valid_commits_message(count) @deprecated_method(deprecated_in((2, 6, 0))) def unknown_key_message(self, count): return unknown_key_message(count) @deprecated_method(deprecated_in((2, 6, 0))) def commit_not_valid_message(self, count): return commit_not_valid_message(count) @deprecated_method(deprecated_in((2, 6, 0))) def commit_not_signed_message(self, count): return commit_not_signed_message(count) @deprecated_method(deprecated_in((2, 6, 0))) def expired_commit_message(self, count): return expired_commit_message(count) def _set_gpg_tty(): tty = os.environ.get('TTY') if tty is not None: os.environ['GPG_TTY'] = tty trace.mutter('setting GPG_TTY=%s', tty) else: # This is not quite worthy of a warning, because some people # don't need GPG_TTY to be set. But it is worthy of a big mark # in ~/.bzr.log, so that people can debug it if it happens to them trace.mutter('** Env var TTY empty, cannot set GPG_TTY.' ' Is TTY exported?') class GPGStrategy(object): """GPG Signing and checking facilities.""" acceptable_keys = None def __init__(self, config_stack): self._config_stack = config_stack try: import gpgme self.context = gpgme.Context() except ImportError, error: pass # can't use verify() @staticmethod def verify_signatures_available(): """ check if this strategy can verify signatures :return: boolean if this strategy can verify signatures """ try: import gpgme return True except ImportError, error: return False def _command_line(self): key = self._config_stack.get('gpg_signing_key') if key is None or key == 'default': # 'default' or not setting gpg_signing_key at all means we should # use the user email address key = config.extract_email_address(self._config_stack.get('email')) return [self._config_stack.get('gpg_signing_command'), '--clearsign', '-u', key] def sign(self, content): if isinstance(content, unicode): raise errors.BzrBadParameterUnicode('content') ui.ui_factory.clear_term() preexec_fn = _set_gpg_tty if sys.platform == 'win32': # Win32 doesn't support preexec_fn, but wouldn't support TTY anyway. preexec_fn = None try: process = subprocess.Popen(self._command_line(), stdin=subprocess.PIPE, stdout=subprocess.PIPE, preexec_fn=preexec_fn) try: result = process.communicate(content)[0] if process.returncode is None: process.wait() if process.returncode != 0: raise errors.SigningFailed(self._command_line()) return result except OSError, e: if e.errno == errno.EPIPE: raise errors.SigningFailed(self._command_line()) else: raise except ValueError: # bad subprocess parameters, should never happen. raise except OSError, e: if e.errno == errno.ENOENT: # gpg is not installed raise errors.SigningFailed(self._command_line()) else: raise def verify(self, content, testament): """Check content has a valid signature. :param content: the commit signature :param testament: the valid testament string for the commit :return: SIGNATURE_VALID or a failed SIGNATURE_ value, key uid if valid """ try: import gpgme except ImportError, error: raise errors.GpgmeNotInstalled(error) signature = StringIO(content) plain_output = StringIO() try: result = self.context.verify(signature, None, plain_output) except gpgme.GpgmeError,error: raise errors.SignatureVerificationFailed(error[2]) # No result if input is invalid. # test_verify_invalid() if len(result) == 0: return SIGNATURE_NOT_VALID, None # User has specified a list of acceptable keys, check our result is in # it. test_verify_unacceptable_key() fingerprint = result[0].fpr if self.acceptable_keys is not None: if not fingerprint in self.acceptable_keys: return SIGNATURE_KEY_MISSING, fingerprint[-8:] # Check the signature actually matches the testament. # test_verify_bad_testament() if testament != plain_output.getvalue(): return SIGNATURE_NOT_VALID, None # Yay gpgme set the valid bit. # Can't write a test for this one as you can't set a key to be # trusted using gpgme. if result[0].summary & gpgme.SIGSUM_VALID: key = self.context.get_key(fingerprint) name = key.uids[0].name email = key.uids[0].email return SIGNATURE_VALID, name + " <" + email + ">" # Sigsum_red indicates a problem, unfortunatly I have not been able # to write any tests which actually set this. if result[0].summary & gpgme.SIGSUM_RED: return SIGNATURE_NOT_VALID, None # GPG does not know this key. # test_verify_unknown_key() if result[0].summary & gpgme.SIGSUM_KEY_MISSING: return SIGNATURE_KEY_MISSING, fingerprint[-8:] # Summary isn't set if sig is valid but key is untrusted but if user # has explicity set the key as acceptable we can validate it. if result[0].summary == 0 and self.acceptable_keys is not None: if fingerprint in self.acceptable_keys: # test_verify_untrusted_but_accepted() return SIGNATURE_VALID, None # test_verify_valid_but_untrusted() if result[0].summary == 0 and self.acceptable_keys is None: return SIGNATURE_NOT_VALID, None if result[0].summary & gpgme.SIGSUM_KEY_EXPIRED: expires = self.context.get_key(result[0].fpr).subkeys[0].expires if expires > result[0].timestamp: # The expired key was not expired at time of signing. # test_verify_expired_but_valid() return SIGNATURE_EXPIRED, fingerprint[-8:] else: # I can't work out how to create a test where the signature # was expired at the time of signing. return SIGNATURE_NOT_VALID, None # A signature from a revoked key gets this. # test_verify_revoked_signature() if ((result[0].summary & gpgme.SIGSUM_SYS_ERROR or result[0].status.strerror == 'Certificate revoked')): return SIGNATURE_NOT_VALID, None # Other error types such as revoked keys should (I think) be caught by # SIGSUM_RED so anything else means something is buggy. raise errors.SignatureVerificationFailed( "Unknown GnuPG key verification result") def set_acceptable_keys(self, command_line_input): """Set the acceptable keys for verifying with this GPGStrategy. :param command_line_input: comma separated list of patterns from command line :return: nothing """ patterns = None acceptable_keys_config = self._config_stack.get('acceptable_keys') if acceptable_keys_config is not None: patterns = acceptable_keys_config if command_line_input is not None: # command line overrides config patterns = command_line_input.split(',') if patterns: self.acceptable_keys = [] for pattern in patterns: result = self.context.keylist(pattern) found_key = False for key in result: found_key = True self.acceptable_keys.append(key.subkeys[0].fpr) trace.mutter("Added acceptable key: " + key.subkeys[0].fpr) if not found_key: trace.note(gettext( "No GnuPG key results for pattern: {0}" ).format(pattern)) @deprecated_method(deprecated_in((2, 6, 0))) def do_verifications(self, revisions, repository, process_events_callback=None): """do verifications on a set of revisions :param revisions: list of revision ids to verify :param repository: repository object :param process_events_callback: method to call for GUI frontends that want to keep their UI refreshed :return: count dictionary of results of each type, result list for each revision, boolean True if all results are verified successfully """ return bulk_verify_signatures(repository, revisions, self, process_events_callback) @deprecated_method(deprecated_in((2, 6, 0))) def verbose_valid_message(self, result): """takes a verify result and returns list of signed commits strings""" return verbose_valid_message(result) @deprecated_method(deprecated_in((2, 6, 0))) def verbose_not_valid_message(self, result, repo): """takes a verify result and returns list of not valid commit info""" return verbose_not_valid_message(result, repo) @deprecated_method(deprecated_in((2, 6, 0))) def verbose_not_signed_message(self, result, repo): """takes a verify result and returns list of not signed commit info""" return verbose_not_valid_message(result, repo) @deprecated_method(deprecated_in((2, 6, 0))) def verbose_missing_key_message(self, result): """takes a verify result and returns list of missing key info""" return verbose_missing_key_message(result) @deprecated_method(deprecated_in((2, 6, 0))) def verbose_expired_key_message(self, result, repo): """takes a verify result and returns list of expired key info""" return verbose_expired_key_message(result, repo) @deprecated_method(deprecated_in((2, 6, 0))) def valid_commits_message(self, count): """returns message for number of commits""" return valid_commits_message(count) @deprecated_method(deprecated_in((2, 6, 0))) def unknown_key_message(self, count): """returns message for number of commits""" return unknown_key_message(count) @deprecated_method(deprecated_in((2, 6, 0))) def commit_not_valid_message(self, count): """returns message for number of commits""" return commit_not_valid_message(count) @deprecated_method(deprecated_in((2, 6, 0))) def commit_not_signed_message(self, count): """returns message for number of commits""" return commit_not_signed_message(count) @deprecated_method(deprecated_in((2, 6, 0))) def expired_commit_message(self, count): """returns message for number of commits""" return expired_commit_message(count) def valid_commits_message(count): """returns message for number of commits""" return gettext(u"{0} commits with valid signatures").format( count[SIGNATURE_VALID]) def unknown_key_message(count): """returns message for number of commits""" return ngettext(u"{0} commit with unknown key", u"{0} commits with unknown keys", count[SIGNATURE_KEY_MISSING]).format( count[SIGNATURE_KEY_MISSING]) def commit_not_valid_message(count): """returns message for number of commits""" return ngettext(u"{0} commit not valid", u"{0} commits not valid", count[SIGNATURE_NOT_VALID]).format( count[SIGNATURE_NOT_VALID]) def commit_not_signed_message(count): """returns message for number of commits""" return ngettext(u"{0} commit not signed", u"{0} commits not signed", count[SIGNATURE_NOT_SIGNED]).format( count[SIGNATURE_NOT_SIGNED]) def expired_commit_message(count): """returns message for number of commits""" return ngettext(u"{0} commit with key now expired", u"{0} commits with key now expired", count[SIGNATURE_EXPIRED]).format( count[SIGNATURE_EXPIRED]) def verbose_expired_key_message(result, repo): """takes a verify result and returns list of expired key info""" signers = {} fingerprint_to_authors = {} for rev_id, validity, fingerprint in result: if validity == SIGNATURE_EXPIRED: revision = repo.get_revision(rev_id) authors = ', '.join(revision.get_apparent_authors()) signers.setdefault(fingerprint, 0) signers[fingerprint] += 1 fingerprint_to_authors[fingerprint] = authors result = [] for fingerprint, number in signers.items(): result.append( ngettext(u"{0} commit by author {1} with key {2} now expired", u"{0} commits by author {1} with key {2} now expired", number).format( number, fingerprint_to_authors[fingerprint], fingerprint)) return result def verbose_valid_message(result): """takes a verify result and returns list of signed commits strings""" signers = {} for rev_id, validity, uid in result: if validity == SIGNATURE_VALID: signers.setdefault(uid, 0) signers[uid] += 1 result = [] for uid, number in signers.items(): result.append(ngettext(u"{0} signed {1} commit", u"{0} signed {1} commits", number).format(uid, number)) return result def verbose_not_valid_message(result, repo): """takes a verify result and returns list of not valid commit info""" signers = {} for rev_id, validity, empty in result: if validity == SIGNATURE_NOT_VALID: revision = repo.get_revision(rev_id) authors = ', '.join(revision.get_apparent_authors()) signers.setdefault(authors, 0) signers[authors] += 1 result = [] for authors, number in signers.items(): result.append(ngettext(u"{0} commit by author {1}", u"{0} commits by author {1}", number).format(number, authors)) return result def verbose_not_signed_message(result, repo): """takes a verify result and returns list of not signed commit info""" signers = {} for rev_id, validity, empty in result: if validity == SIGNATURE_NOT_SIGNED: revision = repo.get_revision(rev_id) authors = ', '.join(revision.get_apparent_authors()) signers.setdefault(authors, 0) signers[authors] += 1 result = [] for authors, number in signers.items(): result.append(ngettext(u"{0} commit by author {1}", u"{0} commits by author {1}", number).format(number, authors)) return result def verbose_missing_key_message(result): """takes a verify result and returns list of missing key info""" signers = {} for rev_id, validity, fingerprint in result: if validity == SIGNATURE_KEY_MISSING: signers.setdefault(fingerprint, 0) signers[fingerprint] += 1 result = [] for fingerprint, number in signers.items(): result.append(ngettext(u"Unknown key {0} signed {1} commit", u"Unknown key {0} signed {1} commits", number).format(fingerprint, number)) return result bzr-2.7.0/bzrlib/graph.py0000644000000000000000000021753711673403246013427 0ustar 00000000000000# Copyright (C) 2007-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import time from bzrlib import ( debug, errors, osutils, revision, trace, ) STEP_UNIQUE_SEARCHER_EVERY = 5 # DIAGRAM of terminology # A # /\ # B C # | |\ # D E F # |\/| | # |/\|/ # G H # # In this diagram, relative to G and H: # A, B, C, D, E are common ancestors. # C, D and E are border ancestors, because each has a non-common descendant. # D and E are least common ancestors because none of their descendants are # common ancestors. # C is not a least common ancestor because its descendant, E, is a common # ancestor. # # The find_unique_lca algorithm will pick A in two steps: # 1. find_lca('G', 'H') => ['D', 'E'] # 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A'] class DictParentsProvider(object): """A parents provider for Graph objects.""" def __init__(self, ancestry): self.ancestry = ancestry def __repr__(self): return 'DictParentsProvider(%r)' % self.ancestry # Note: DictParentsProvider does not implement get_cached_parent_map # Arguably, the data is clearly cached in memory. However, this class # is mostly used for testing, and it keeps the tests clean to not # change it. def get_parent_map(self, keys): """See StackedParentsProvider.get_parent_map""" ancestry = self.ancestry return dict([(k, ancestry[k]) for k in keys if k in ancestry]) class StackedParentsProvider(object): """A parents provider which stacks (or unions) multiple providers. The providers are queries in the order of the provided parent_providers. """ def __init__(self, parent_providers): self._parent_providers = parent_providers def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._parent_providers) def get_parent_map(self, keys): """Get a mapping of keys => parents A dictionary is returned with an entry for each key present in this source. If this source doesn't have information about a key, it should not include an entry. [NULL_REVISION] is used as the parent of the first user-committed revision. Its parent list is empty. :param keys: An iterable returning keys to check (eg revision_ids) :return: A dictionary mapping each key to its parents """ found = {} remaining = set(keys) # This adds getattr() overhead to each get_parent_map call. However, # this is StackedParentsProvider, which means we're dealing with I/O # (either local indexes, or remote RPCs), so CPU overhead should be # minimal. for parents_provider in self._parent_providers: get_cached = getattr(parents_provider, 'get_cached_parent_map', None) if get_cached is None: continue new_found = get_cached(remaining) found.update(new_found) remaining.difference_update(new_found) if not remaining: break if not remaining: return found for parents_provider in self._parent_providers: new_found = parents_provider.get_parent_map(remaining) found.update(new_found) remaining.difference_update(new_found) if not remaining: break return found class CachingParentsProvider(object): """A parents provider which will cache the revision => parents as a dict. This is useful for providers which have an expensive look up. Either a ParentsProvider or a get_parent_map-like callback may be supplied. If it provides extra un-asked-for parents, they will be cached, but filtered out of get_parent_map. The cache is enabled by default, but may be disabled and re-enabled. """ def __init__(self, parent_provider=None, get_parent_map=None): """Constructor. :param parent_provider: The ParentProvider to use. It or get_parent_map must be supplied. :param get_parent_map: The get_parent_map callback to use. It or parent_provider must be supplied. """ self._real_provider = parent_provider if get_parent_map is None: self._get_parent_map = self._real_provider.get_parent_map else: self._get_parent_map = get_parent_map self._cache = None self.enable_cache(True) def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._real_provider) def enable_cache(self, cache_misses=True): """Enable cache.""" if self._cache is not None: raise AssertionError('Cache enabled when already enabled.') self._cache = {} self._cache_misses = cache_misses self.missing_keys = set() def disable_cache(self): """Disable and clear the cache.""" self._cache = None self._cache_misses = None self.missing_keys = set() def get_cached_map(self): """Return any cached get_parent_map values.""" if self._cache is None: return None return dict(self._cache) def get_cached_parent_map(self, keys): """Return items from the cache. This returns the same info as get_parent_map, but explicitly does not invoke the supplied ParentsProvider to search for uncached values. """ cache = self._cache if cache is None: return {} return dict([(key, cache[key]) for key in keys if key in cache]) def get_parent_map(self, keys): """See StackedParentsProvider.get_parent_map.""" cache = self._cache if cache is None: cache = self._get_parent_map(keys) else: needed_revisions = set(key for key in keys if key not in cache) # Do not ask for negatively cached keys needed_revisions.difference_update(self.missing_keys) if needed_revisions: parent_map = self._get_parent_map(needed_revisions) cache.update(parent_map) if self._cache_misses: for key in needed_revisions: if key not in parent_map: self.note_missing_key(key) result = {} for key in keys: value = cache.get(key) if value is not None: result[key] = value return result def note_missing_key(self, key): """Note that key is a missing key.""" if self._cache_misses: self.missing_keys.add(key) class CallableToParentsProviderAdapter(object): """A parents provider that adapts any callable to the parents provider API. i.e. it accepts calls to self.get_parent_map and relays them to the callable it was constructed with. """ def __init__(self, a_callable): self.callable = a_callable def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.callable) def get_parent_map(self, keys): return self.callable(keys) class Graph(object): """Provide incremental access to revision graphs. This is the generic implementation; it is intended to be subclassed to specialize it for other repository types. """ def __init__(self, parents_provider): """Construct a Graph that uses several graphs as its input This should not normally be invoked directly, because there may be specialized implementations for particular repository types. See Repository.get_graph(). :param parents_provider: An object providing a get_parent_map call conforming to the behavior of StackedParentsProvider.get_parent_map. """ if getattr(parents_provider, 'get_parents', None) is not None: self.get_parents = parents_provider.get_parents if getattr(parents_provider, 'get_parent_map', None) is not None: self.get_parent_map = parents_provider.get_parent_map self._parents_provider = parents_provider def __repr__(self): return 'Graph(%r)' % self._parents_provider def find_lca(self, *revisions): """Determine the lowest common ancestors of the provided revisions A lowest common ancestor is a common ancestor none of whose descendants are common ancestors. In graphs, unlike trees, there may be multiple lowest common ancestors. This algorithm has two phases. Phase 1 identifies border ancestors, and phase 2 filters border ancestors to determine lowest common ancestors. In phase 1, border ancestors are identified, using a breadth-first search starting at the bottom of the graph. Searches are stopped whenever a node or one of its descendants is determined to be common In phase 2, the border ancestors are filtered to find the least common ancestors. This is done by searching the ancestries of each border ancestor. Phase 2 is perfomed on the principle that a border ancestor that is not an ancestor of any other border ancestor is a least common ancestor. Searches are stopped when they find a node that is determined to be a common ancestor of all border ancestors, because this shows that it cannot be a descendant of any border ancestor. The scaling of this operation should be proportional to: 1. The number of uncommon ancestors 2. The number of border ancestors 3. The length of the shortest path between a border ancestor and an ancestor of all border ancestors. """ border_common, common, sides = self._find_border_ancestors(revisions) # We may have common ancestors that can be reached from each other. # - ask for the heads of them to filter it down to only ones that # cannot be reached from each other - phase 2. return self.heads(border_common) def find_difference(self, left_revision, right_revision): """Determine the graph difference between two revisions""" border, common, searchers = self._find_border_ancestors( [left_revision, right_revision]) self._search_for_extra_common(common, searchers) left = searchers[0].seen right = searchers[1].seen return (left.difference(right), right.difference(left)) def find_descendants(self, old_key, new_key): """Find descendants of old_key that are ancestors of new_key.""" child_map = self.get_child_map(self._find_descendant_ancestors( old_key, new_key)) graph = Graph(DictParentsProvider(child_map)) searcher = graph._make_breadth_first_searcher([old_key]) list(searcher) return searcher.seen def _find_descendant_ancestors(self, old_key, new_key): """Find ancestors of new_key that may be descendants of old_key.""" stop = self._make_breadth_first_searcher([old_key]) descendants = self._make_breadth_first_searcher([new_key]) for revisions in descendants: old_stop = stop.seen.intersection(revisions) descendants.stop_searching_any(old_stop) seen_stop = descendants.find_seen_ancestors(stop.step()) descendants.stop_searching_any(seen_stop) return descendants.seen.difference(stop.seen) def get_child_map(self, keys): """Get a mapping from parents to children of the specified keys. This is simply the inversion of get_parent_map. Only supplied keys will be discovered as children. :return: a dict of key:child_list for keys. """ parent_map = self._parents_provider.get_parent_map(keys) parent_child = {} for child, parents in sorted(parent_map.items()): for parent in parents: parent_child.setdefault(parent, []).append(child) return parent_child def find_distance_to_null(self, target_revision_id, known_revision_ids): """Find the left-hand distance to the NULL_REVISION. (This can also be considered the revno of a branch at target_revision_id.) :param target_revision_id: A revision_id which we would like to know the revno for. :param known_revision_ids: [(revision_id, revno)] A list of known revno, revision_id tuples. We'll use this to seed the search. """ # Map from revision_ids to a known value for their revno known_revnos = dict(known_revision_ids) cur_tip = target_revision_id num_steps = 0 NULL_REVISION = revision.NULL_REVISION known_revnos[NULL_REVISION] = 0 searching_known_tips = list(known_revnos.keys()) unknown_searched = {} while cur_tip not in known_revnos: unknown_searched[cur_tip] = num_steps num_steps += 1 to_search = set([cur_tip]) to_search.update(searching_known_tips) parent_map = self.get_parent_map(to_search) parents = parent_map.get(cur_tip, None) if not parents: # An empty list or None is a ghost raise errors.GhostRevisionsHaveNoRevno(target_revision_id, cur_tip) cur_tip = parents[0] next_known_tips = [] for revision_id in searching_known_tips: parents = parent_map.get(revision_id, None) if not parents: continue next = parents[0] next_revno = known_revnos[revision_id] - 1 if next in unknown_searched: # We have enough information to return a value right now return next_revno + unknown_searched[next] if next in known_revnos: continue known_revnos[next] = next_revno next_known_tips.append(next) searching_known_tips = next_known_tips # We reached a known revision, so just add in how many steps it took to # get there. return known_revnos[cur_tip] + num_steps def find_lefthand_distances(self, keys): """Find the distance to null for all the keys in keys. :param keys: keys to lookup. :return: A dict key->distance for all of keys. """ # Optimisable by concurrent searching, but a random spread should get # some sort of hit rate. result = {} known_revnos = [] ghosts = [] for key in keys: try: known_revnos.append( (key, self.find_distance_to_null(key, known_revnos))) except errors.GhostRevisionsHaveNoRevno: ghosts.append(key) for key in ghosts: known_revnos.append((key, -1)) return dict(known_revnos) def find_unique_ancestors(self, unique_revision, common_revisions): """Find the unique ancestors for a revision versus others. This returns the ancestry of unique_revision, excluding all revisions in the ancestry of common_revisions. If unique_revision is in the ancestry, then the empty set will be returned. :param unique_revision: The revision_id whose ancestry we are interested in. (XXX: Would this API be better if we allowed multiple revisions on to be searched here?) :param common_revisions: Revision_ids of ancestries to exclude. :return: A set of revisions in the ancestry of unique_revision """ if unique_revision in common_revisions: return set() # Algorithm description # 1) Walk backwards from the unique node and all common nodes. # 2) When a node is seen by both sides, stop searching it in the unique # walker, include it in the common walker. # 3) Stop searching when there are no nodes left for the unique walker. # At this point, you have a maximal set of unique nodes. Some of # them may actually be common, and you haven't reached them yet. # 4) Start new searchers for the unique nodes, seeded with the # information you have so far. # 5) Continue searching, stopping the common searches when the search # tip is an ancestor of all unique nodes. # 6) Aggregate together unique searchers when they are searching the # same tips. When all unique searchers are searching the same node, # stop move it to a single 'all_unique_searcher'. # 7) The 'all_unique_searcher' represents the very 'tip' of searching. # Most of the time this produces very little important information. # So don't step it as quickly as the other searchers. # 8) Search is done when all common searchers have completed. unique_searcher, common_searcher = self._find_initial_unique_nodes( [unique_revision], common_revisions) unique_nodes = unique_searcher.seen.difference(common_searcher.seen) if not unique_nodes: return unique_nodes (all_unique_searcher, unique_tip_searchers) = self._make_unique_searchers(unique_nodes, unique_searcher, common_searcher) self._refine_unique_nodes(unique_searcher, all_unique_searcher, unique_tip_searchers, common_searcher) true_unique_nodes = unique_nodes.difference(common_searcher.seen) if 'graph' in debug.debug_flags: trace.mutter('Found %d truly unique nodes out of %d', len(true_unique_nodes), len(unique_nodes)) return true_unique_nodes def _find_initial_unique_nodes(self, unique_revisions, common_revisions): """Steps 1-3 of find_unique_ancestors. Find the maximal set of unique nodes. Some of these might actually still be common, but we are sure that there are no other unique nodes. :return: (unique_searcher, common_searcher) """ unique_searcher = self._make_breadth_first_searcher(unique_revisions) # we know that unique_revisions aren't in common_revisions, so skip # past them. unique_searcher.next() common_searcher = self._make_breadth_first_searcher(common_revisions) # As long as we are still finding unique nodes, keep searching while unique_searcher._next_query: next_unique_nodes = set(unique_searcher.step()) next_common_nodes = set(common_searcher.step()) # Check if either searcher encounters new nodes seen by the other # side. unique_are_common_nodes = next_unique_nodes.intersection( common_searcher.seen) unique_are_common_nodes.update( next_common_nodes.intersection(unique_searcher.seen)) if unique_are_common_nodes: ancestors = unique_searcher.find_seen_ancestors( unique_are_common_nodes) # TODO: This is a bit overboard, we only really care about # the ancestors of the tips because the rest we # already know. This is *correct* but causes us to # search too much ancestry. ancestors.update(common_searcher.find_seen_ancestors(ancestors)) unique_searcher.stop_searching_any(ancestors) common_searcher.start_searching(ancestors) return unique_searcher, common_searcher def _make_unique_searchers(self, unique_nodes, unique_searcher, common_searcher): """Create a searcher for all the unique search tips (step 4). As a side effect, the common_searcher will stop searching any nodes that are ancestors of the unique searcher tips. :return: (all_unique_searcher, unique_tip_searchers) """ unique_tips = self._remove_simple_descendants(unique_nodes, self.get_parent_map(unique_nodes)) if len(unique_tips) == 1: unique_tip_searchers = [] ancestor_all_unique = unique_searcher.find_seen_ancestors(unique_tips) else: unique_tip_searchers = [] for tip in unique_tips: revs_to_search = unique_searcher.find_seen_ancestors([tip]) revs_to_search.update( common_searcher.find_seen_ancestors(revs_to_search)) searcher = self._make_breadth_first_searcher(revs_to_search) # We don't care about the starting nodes. searcher._label = tip searcher.step() unique_tip_searchers.append(searcher) ancestor_all_unique = None for searcher in unique_tip_searchers: if ancestor_all_unique is None: ancestor_all_unique = set(searcher.seen) else: ancestor_all_unique = ancestor_all_unique.intersection( searcher.seen) # Collapse all the common nodes into a single searcher all_unique_searcher = self._make_breadth_first_searcher( ancestor_all_unique) if ancestor_all_unique: # We've seen these nodes in all the searchers, so we'll just go to # the next all_unique_searcher.step() # Stop any search tips that are already known as ancestors of the # unique nodes stopped_common = common_searcher.stop_searching_any( common_searcher.find_seen_ancestors(ancestor_all_unique)) total_stopped = 0 for searcher in unique_tip_searchers: total_stopped += len(searcher.stop_searching_any( searcher.find_seen_ancestors(ancestor_all_unique))) if 'graph' in debug.debug_flags: trace.mutter('For %d unique nodes, created %d + 1 unique searchers' ' (%d stopped search tips, %d common ancestors' ' (%d stopped common)', len(unique_nodes), len(unique_tip_searchers), total_stopped, len(ancestor_all_unique), len(stopped_common)) return all_unique_searcher, unique_tip_searchers def _step_unique_and_common_searchers(self, common_searcher, unique_tip_searchers, unique_searcher): """Step all the searchers""" newly_seen_common = set(common_searcher.step()) newly_seen_unique = set() for searcher in unique_tip_searchers: next = set(searcher.step()) next.update(unique_searcher.find_seen_ancestors(next)) next.update(common_searcher.find_seen_ancestors(next)) for alt_searcher in unique_tip_searchers: if alt_searcher is searcher: continue next.update(alt_searcher.find_seen_ancestors(next)) searcher.start_searching(next) newly_seen_unique.update(next) return newly_seen_common, newly_seen_unique def _find_nodes_common_to_all_unique(self, unique_tip_searchers, all_unique_searcher, newly_seen_unique, step_all_unique): """Find nodes that are common to all unique_tip_searchers. If it is time, step the all_unique_searcher, and add its nodes to the result. """ common_to_all_unique_nodes = newly_seen_unique.copy() for searcher in unique_tip_searchers: common_to_all_unique_nodes.intersection_update(searcher.seen) common_to_all_unique_nodes.intersection_update( all_unique_searcher.seen) # Step all-unique less frequently than the other searchers. # In the common case, we don't need to spider out far here, so # avoid doing extra work. if step_all_unique: tstart = time.clock() nodes = all_unique_searcher.step() common_to_all_unique_nodes.update(nodes) if 'graph' in debug.debug_flags: tdelta = time.clock() - tstart trace.mutter('all_unique_searcher step() took %.3fs' 'for %d nodes (%d total), iteration: %s', tdelta, len(nodes), len(all_unique_searcher.seen), all_unique_searcher._iterations) return common_to_all_unique_nodes def _collapse_unique_searchers(self, unique_tip_searchers, common_to_all_unique_nodes): """Combine searchers that are searching the same tips. When two searchers are searching the same tips, we can stop one of the searchers. We also know that the maximal set of common ancestors is the intersection of the two original searchers. :return: A list of searchers that are searching unique nodes. """ # Filter out searchers that don't actually search different # nodes. We already have the ancestry intersection for them unique_search_tips = {} for searcher in unique_tip_searchers: stopped = searcher.stop_searching_any(common_to_all_unique_nodes) will_search_set = frozenset(searcher._next_query) if not will_search_set: if 'graph' in debug.debug_flags: trace.mutter('Unique searcher %s was stopped.' ' (%s iterations) %d nodes stopped', searcher._label, searcher._iterations, len(stopped)) elif will_search_set not in unique_search_tips: # This searcher is searching a unique set of nodes, let it unique_search_tips[will_search_set] = [searcher] else: unique_search_tips[will_search_set].append(searcher) # TODO: it might be possible to collapse searchers faster when they # only have *some* search tips in common. next_unique_searchers = [] for searchers in unique_search_tips.itervalues(): if len(searchers) == 1: # Searching unique tips, go for it next_unique_searchers.append(searchers[0]) else: # These searchers have started searching the same tips, we # don't need them to cover the same ground. The # intersection of their ancestry won't change, so create a # new searcher, combining their histories. next_searcher = searchers[0] for searcher in searchers[1:]: next_searcher.seen.intersection_update(searcher.seen) if 'graph' in debug.debug_flags: trace.mutter('Combining %d searchers into a single' ' searcher searching %d nodes with' ' %d ancestry', len(searchers), len(next_searcher._next_query), len(next_searcher.seen)) next_unique_searchers.append(next_searcher) return next_unique_searchers def _refine_unique_nodes(self, unique_searcher, all_unique_searcher, unique_tip_searchers, common_searcher): """Steps 5-8 of find_unique_ancestors. This function returns when common_searcher has stopped searching for more nodes. """ # We step the ancestor_all_unique searcher only every # STEP_UNIQUE_SEARCHER_EVERY steps. step_all_unique_counter = 0 # While we still have common nodes to search while common_searcher._next_query: (newly_seen_common, newly_seen_unique) = self._step_unique_and_common_searchers( common_searcher, unique_tip_searchers, unique_searcher) # These nodes are common ancestors of all unique nodes common_to_all_unique_nodes = self._find_nodes_common_to_all_unique( unique_tip_searchers, all_unique_searcher, newly_seen_unique, step_all_unique_counter==0) step_all_unique_counter = ((step_all_unique_counter + 1) % STEP_UNIQUE_SEARCHER_EVERY) if newly_seen_common: # If a 'common' node is an ancestor of all unique searchers, we # can stop searching it. common_searcher.stop_searching_any( all_unique_searcher.seen.intersection(newly_seen_common)) if common_to_all_unique_nodes: common_to_all_unique_nodes.update( common_searcher.find_seen_ancestors( common_to_all_unique_nodes)) # The all_unique searcher can start searching the common nodes # but everyone else can stop. # This is the sort of thing where we would like to not have it # start_searching all of the nodes, but only mark all of them # as seen, and have it search only the actual tips. Otherwise # it is another get_parent_map() traversal for it to figure out # what we already should know. all_unique_searcher.start_searching(common_to_all_unique_nodes) common_searcher.stop_searching_any(common_to_all_unique_nodes) next_unique_searchers = self._collapse_unique_searchers( unique_tip_searchers, common_to_all_unique_nodes) if len(unique_tip_searchers) != len(next_unique_searchers): if 'graph' in debug.debug_flags: trace.mutter('Collapsed %d unique searchers => %d' ' at %s iterations', len(unique_tip_searchers), len(next_unique_searchers), all_unique_searcher._iterations) unique_tip_searchers = next_unique_searchers def get_parent_map(self, revisions): """Get a map of key:parent_list for revisions. This implementation delegates to get_parents, for old parent_providers that do not supply get_parent_map. """ result = {} for rev, parents in self.get_parents(revisions): if parents is not None: result[rev] = parents return result def _make_breadth_first_searcher(self, revisions): return _BreadthFirstSearcher(revisions, self) def _find_border_ancestors(self, revisions): """Find common ancestors with at least one uncommon descendant. Border ancestors are identified using a breadth-first search starting at the bottom of the graph. Searches are stopped whenever a node or one of its descendants is determined to be common. This will scale with the number of uncommon ancestors. As well as the border ancestors, a set of seen common ancestors and a list of sets of seen ancestors for each input revision is returned. This allows calculation of graph difference from the results of this operation. """ if None in revisions: raise errors.InvalidRevisionId(None, self) common_ancestors = set() searchers = [self._make_breadth_first_searcher([r]) for r in revisions] active_searchers = searchers[:] border_ancestors = set() while True: newly_seen = set() for searcher in searchers: new_ancestors = searcher.step() if new_ancestors: newly_seen.update(new_ancestors) new_common = set() for revision in newly_seen: if revision in common_ancestors: # Not a border ancestor because it was seen as common # already new_common.add(revision) continue for searcher in searchers: if revision not in searcher.seen: break else: # This is a border because it is a first common that we see # after walking for a while. border_ancestors.add(revision) new_common.add(revision) if new_common: for searcher in searchers: new_common.update(searcher.find_seen_ancestors(new_common)) for searcher in searchers: searcher.start_searching(new_common) common_ancestors.update(new_common) # Figure out what the searchers will be searching next, and if # there is only 1 set being searched, then we are done searching, # since all searchers would have to be searching the same data, # thus it *must* be in common. unique_search_sets = set() for searcher in searchers: will_search_set = frozenset(searcher._next_query) if will_search_set not in unique_search_sets: # This searcher is searching a unique set of nodes, let it unique_search_sets.add(will_search_set) if len(unique_search_sets) == 1: nodes = unique_search_sets.pop() uncommon_nodes = nodes.difference(common_ancestors) if uncommon_nodes: raise AssertionError("Somehow we ended up converging" " without actually marking them as" " in common." "\nStart_nodes: %s" "\nuncommon_nodes: %s" % (revisions, uncommon_nodes)) break return border_ancestors, common_ancestors, searchers def heads(self, keys): """Return the heads from amongst keys. This is done by searching the ancestries of each key. Any key that is reachable from another key is not returned; all the others are. This operation scales with the relative depth between any two keys. If any two keys are completely disconnected all ancestry of both sides will be retrieved. :param keys: An iterable of keys. :return: A set of the heads. Note that as a set there is no ordering information. Callers will need to filter their input to create order if they need it. """ candidate_heads = set(keys) if revision.NULL_REVISION in candidate_heads: # NULL_REVISION is only a head if it is the only entry candidate_heads.remove(revision.NULL_REVISION) if not candidate_heads: return set([revision.NULL_REVISION]) if len(candidate_heads) < 2: return candidate_heads searchers = dict((c, self._make_breadth_first_searcher([c])) for c in candidate_heads) active_searchers = dict(searchers) # skip over the actual candidate for each searcher for searcher in active_searchers.itervalues(): searcher.next() # The common walker finds nodes that are common to two or more of the # input keys, so that we don't access all history when a currently # uncommon search point actually meets up with something behind a # common search point. Common search points do not keep searches # active; they just allow us to make searches inactive without # accessing all history. common_walker = self._make_breadth_first_searcher([]) while len(active_searchers) > 0: ancestors = set() # advance searches try: common_walker.next() except StopIteration: # No common points being searched at this time. pass for candidate in active_searchers.keys(): try: searcher = active_searchers[candidate] except KeyError: # rare case: we deleted candidate in a previous iteration # through this for loop, because it was determined to be # a descendant of another candidate. continue try: ancestors.update(searcher.next()) except StopIteration: del active_searchers[candidate] continue # process found nodes new_common = set() for ancestor in ancestors: if ancestor in candidate_heads: candidate_heads.remove(ancestor) del searchers[ancestor] if ancestor in active_searchers: del active_searchers[ancestor] # it may meet up with a known common node if ancestor in common_walker.seen: # some searcher has encountered our known common nodes: # just stop it ancestor_set = set([ancestor]) for searcher in searchers.itervalues(): searcher.stop_searching_any(ancestor_set) else: # or it may have been just reached by all the searchers: for searcher in searchers.itervalues(): if ancestor not in searcher.seen: break else: # The final active searcher has just reached this node, # making it be known as a descendant of all candidates, # so we can stop searching it, and any seen ancestors new_common.add(ancestor) for searcher in searchers.itervalues(): seen_ancestors =\ searcher.find_seen_ancestors([ancestor]) searcher.stop_searching_any(seen_ancestors) common_walker.start_searching(new_common) return candidate_heads def find_merge_order(self, tip_revision_id, lca_revision_ids): """Find the order that each revision was merged into tip. This basically just walks backwards with a stack, and walks left-first until it finds a node to stop. """ if len(lca_revision_ids) == 1: return list(lca_revision_ids) looking_for = set(lca_revision_ids) # TODO: Is there a way we could do this "faster" by batching up the # get_parent_map requests? # TODO: Should we also be culling the ancestry search right away? We # could add looking_for to the "stop" list, and walk their # ancestry in batched mode. The flip side is it might mean we walk a # lot of "stop" nodes, rather than only the minimum. # Then again, without it we may trace back into ancestry we could have # stopped early. stack = [tip_revision_id] found = [] stop = set() while stack and looking_for: next = stack.pop() stop.add(next) if next in looking_for: found.append(next) looking_for.remove(next) if len(looking_for) == 1: found.append(looking_for.pop()) break continue parent_ids = self.get_parent_map([next]).get(next, None) if not parent_ids: # Ghost, nothing to search here continue for parent_id in reversed(parent_ids): # TODO: (performance) We see the parent at this point, but we # wait to mark it until later to make sure we get left # parents before right parents. However, instead of # waiting until we have traversed enough parents, we # could instead note that we've found it, and once all # parents are in the stack, just reverse iterate the # stack for them. if parent_id not in stop: # this will need to be searched stack.append(parent_id) stop.add(parent_id) return found def find_lefthand_merger(self, merged_key, tip_key): """Find the first lefthand ancestor of tip_key that merged merged_key. We do this by first finding the descendants of merged_key, then walking through the lefthand ancestry of tip_key until we find a key that doesn't descend from merged_key. Its child is the key that merged merged_key. :return: The first lefthand ancestor of tip_key to merge merged_key. merged_key if it is a lefthand ancestor of tip_key. None if no ancestor of tip_key merged merged_key. """ descendants = self.find_descendants(merged_key, tip_key) candidate_iterator = self.iter_lefthand_ancestry(tip_key) last_candidate = None for candidate in candidate_iterator: if candidate not in descendants: return last_candidate last_candidate = candidate def find_unique_lca(self, left_revision, right_revision, count_steps=False): """Find a unique LCA. Find lowest common ancestors. If there is no unique common ancestor, find the lowest common ancestors of those ancestors. Iteration stops when a unique lowest common ancestor is found. The graph origin is necessarily a unique lowest common ancestor. Note that None is not an acceptable substitute for NULL_REVISION. in the input for this method. :param count_steps: If True, the return value will be a tuple of (unique_lca, steps) where steps is the number of times that find_lca was run. If False, only unique_lca is returned. """ revisions = [left_revision, right_revision] steps = 0 while True: steps += 1 lca = self.find_lca(*revisions) if len(lca) == 1: result = lca.pop() if count_steps: return result, steps else: return result if len(lca) == 0: raise errors.NoCommonAncestor(left_revision, right_revision) revisions = lca def iter_ancestry(self, revision_ids): """Iterate the ancestry of this revision. :param revision_ids: Nodes to start the search :return: Yield tuples mapping a revision_id to its parents for the ancestry of revision_id. Ghosts will be returned with None as their parents, and nodes with no parents will have NULL_REVISION as their only parent. (As defined by get_parent_map.) There will also be a node for (NULL_REVISION, ()) """ pending = set(revision_ids) processed = set() while pending: processed.update(pending) next_map = self.get_parent_map(pending) next_pending = set() for item in next_map.iteritems(): yield item next_pending.update(p for p in item[1] if p not in processed) ghosts = pending.difference(next_map) for ghost in ghosts: yield (ghost, None) pending = next_pending def iter_lefthand_ancestry(self, start_key, stop_keys=None): if stop_keys is None: stop_keys = () next_key = start_key def get_parents(key): try: return self._parents_provider.get_parent_map([key])[key] except KeyError: raise errors.RevisionNotPresent(next_key, self) while True: if next_key in stop_keys: return parents = get_parents(next_key) yield next_key if len(parents) == 0: return else: next_key = parents[0] def iter_topo_order(self, revisions): """Iterate through the input revisions in topological order. This sorting only ensures that parents come before their children. An ancestor may sort after a descendant if the relationship is not visible in the supplied list of revisions. """ from bzrlib import tsort sorter = tsort.TopoSorter(self.get_parent_map(revisions)) return sorter.iter_topo_order() def is_ancestor(self, candidate_ancestor, candidate_descendant): """Determine whether a revision is an ancestor of another. We answer this using heads() as heads() has the logic to perform the smallest number of parent lookups to determine the ancestral relationship between N revisions. """ return set([candidate_descendant]) == self.heads( [candidate_ancestor, candidate_descendant]) def is_between(self, revid, lower_bound_revid, upper_bound_revid): """Determine whether a revision is between two others. returns true if and only if: lower_bound_revid <= revid <= upper_bound_revid """ return ((upper_bound_revid is None or self.is_ancestor(revid, upper_bound_revid)) and (lower_bound_revid is None or self.is_ancestor(lower_bound_revid, revid))) def _search_for_extra_common(self, common, searchers): """Make sure that unique nodes are genuinely unique. After _find_border_ancestors, all nodes marked "common" are indeed common. Some of the nodes considered unique are not, due to history shortcuts stopping the searches early. We know that we have searched enough when all common search tips are descended from all unique (uncommon) nodes because we know that a node cannot be an ancestor of its own ancestor. :param common: A set of common nodes :param searchers: The searchers returned from _find_border_ancestors :return: None """ # Basic algorithm... # A) The passed in searchers should all be on the same tips, thus # they should be considered the "common" searchers. # B) We find the difference between the searchers, these are the # "unique" nodes for each side. # C) We do a quick culling so that we only start searching from the # more interesting unique nodes. (A unique ancestor is more # interesting than any of its children.) # D) We start searching for ancestors common to all unique nodes. # E) We have the common searchers stop searching any ancestors of # nodes found by (D) # F) When there are no more common search tips, we stop # TODO: We need a way to remove unique_searchers when they overlap with # other unique searchers. if len(searchers) != 2: raise NotImplementedError( "Algorithm not yet implemented for > 2 searchers") common_searchers = searchers left_searcher = searchers[0] right_searcher = searchers[1] unique = left_searcher.seen.symmetric_difference(right_searcher.seen) if not unique: # No unique nodes, nothing to do return total_unique = len(unique) unique = self._remove_simple_descendants(unique, self.get_parent_map(unique)) simple_unique = len(unique) unique_searchers = [] for revision_id in unique: if revision_id in left_searcher.seen: parent_searcher = left_searcher else: parent_searcher = right_searcher revs_to_search = parent_searcher.find_seen_ancestors([revision_id]) if not revs_to_search: # XXX: This shouldn't be possible revs_to_search = [revision_id] searcher = self._make_breadth_first_searcher(revs_to_search) # We don't care about the starting nodes. searcher.step() unique_searchers.append(searcher) # possible todo: aggregate the common searchers into a single common # searcher, just make sure that we include the nodes into the .seen # properties of the original searchers ancestor_all_unique = None for searcher in unique_searchers: if ancestor_all_unique is None: ancestor_all_unique = set(searcher.seen) else: ancestor_all_unique = ancestor_all_unique.intersection( searcher.seen) trace.mutter('Started %s unique searchers for %s unique revisions', simple_unique, total_unique) while True: # If we have no more nodes we have nothing to do newly_seen_common = set() for searcher in common_searchers: newly_seen_common.update(searcher.step()) newly_seen_unique = set() for searcher in unique_searchers: newly_seen_unique.update(searcher.step()) new_common_unique = set() for revision in newly_seen_unique: for searcher in unique_searchers: if revision not in searcher.seen: break else: # This is a border because it is a first common that we see # after walking for a while. new_common_unique.add(revision) if newly_seen_common: # These are nodes descended from one of the 'common' searchers. # Make sure all searchers are on the same page for searcher in common_searchers: newly_seen_common.update( searcher.find_seen_ancestors(newly_seen_common)) # We start searching the whole ancestry. It is a bit wasteful, # though. We really just want to mark all of these nodes as # 'seen' and then start just the tips. However, it requires a # get_parent_map() call to figure out the tips anyway, and all # redundant requests should be fairly fast. for searcher in common_searchers: searcher.start_searching(newly_seen_common) # If a 'common' node is an ancestor of all unique searchers, we # can stop searching it. stop_searching_common = ancestor_all_unique.intersection( newly_seen_common) if stop_searching_common: for searcher in common_searchers: searcher.stop_searching_any(stop_searching_common) if new_common_unique: # We found some ancestors that are common for searcher in unique_searchers: new_common_unique.update( searcher.find_seen_ancestors(new_common_unique)) # Since these are common, we can grab another set of ancestors # that we have seen for searcher in common_searchers: new_common_unique.update( searcher.find_seen_ancestors(new_common_unique)) # We can tell all of the unique searchers to start at these # nodes, and tell all of the common searchers to *stop* # searching these nodes for searcher in unique_searchers: searcher.start_searching(new_common_unique) for searcher in common_searchers: searcher.stop_searching_any(new_common_unique) ancestor_all_unique.update(new_common_unique) # Filter out searchers that don't actually search different # nodes. We already have the ancestry intersection for them next_unique_searchers = [] unique_search_sets = set() for searcher in unique_searchers: will_search_set = frozenset(searcher._next_query) if will_search_set not in unique_search_sets: # This searcher is searching a unique set of nodes, let it unique_search_sets.add(will_search_set) next_unique_searchers.append(searcher) unique_searchers = next_unique_searchers for searcher in common_searchers: if searcher._next_query: break else: # All common searcher have stopped searching return def _remove_simple_descendants(self, revisions, parent_map): """remove revisions which are children of other ones in the set This doesn't do any graph searching, it just checks the immediate parent_map to find if there are any children which can be removed. :param revisions: A set of revision_ids :return: A set of revision_ids with the children removed """ simple_ancestors = revisions.copy() # TODO: jam 20071214 we *could* restrict it to searching only the # parent_map of revisions already present in 'revisions', but # considering the general use case, I think this is actually # better. # This is the same as the following loop. I don't know that it is any # faster. ## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems() ## if p_ids is not None and revisions.intersection(p_ids)) ## return simple_ancestors # Yet Another Way, invert the parent map (which can be cached) ## descendants = {} ## for revision_id, parent_ids in parent_map.iteritems(): ## for p_id in parent_ids: ## descendants.setdefault(p_id, []).append(revision_id) ## for revision in revisions.intersection(descendants): ## simple_ancestors.difference_update(descendants[revision]) ## return simple_ancestors for revision, parent_ids in parent_map.iteritems(): if parent_ids is None: continue for parent_id in parent_ids: if parent_id in revisions: # This node has a parent present in the set, so we can # remove it simple_ancestors.discard(revision) break return simple_ancestors class HeadsCache(object): """A cache of results for graph heads calls.""" def __init__(self, graph): self.graph = graph self._heads = {} def heads(self, keys): """Return the heads of keys. This matches the API of Graph.heads(), specifically the return value is a set which can be mutated, and ordering of the input is not preserved in the output. :see also: Graph.heads. :param keys: The keys to calculate heads for. :return: A set containing the heads, which may be mutated without affecting future lookups. """ keys = frozenset(keys) try: return set(self._heads[keys]) except KeyError: heads = self.graph.heads(keys) self._heads[keys] = heads return set(heads) class FrozenHeadsCache(object): """Cache heads() calls, assuming the caller won't modify them.""" def __init__(self, graph): self.graph = graph self._heads = {} def heads(self, keys): """Return the heads of keys. Similar to Graph.heads(). The main difference is that the return value is a frozen set which cannot be mutated. :see also: Graph.heads. :param keys: The keys to calculate heads for. :return: A frozenset containing the heads. """ keys = frozenset(keys) try: return self._heads[keys] except KeyError: heads = frozenset(self.graph.heads(keys)) self._heads[keys] = heads return heads def cache(self, keys, heads): """Store a known value.""" self._heads[frozenset(keys)] = frozenset(heads) class _BreadthFirstSearcher(object): """Parallel search breadth-first the ancestry of revisions. This class implements the iterator protocol, but additionally 1. provides a set of seen ancestors, and 2. allows some ancestries to be unsearched, via stop_searching_any """ def __init__(self, revisions, parents_provider): self._iterations = 0 self._next_query = set(revisions) self.seen = set() self._started_keys = set(self._next_query) self._stopped_keys = set() self._parents_provider = parents_provider self._returning = 'next_with_ghosts' self._current_present = set() self._current_ghosts = set() self._current_parents = {} def __repr__(self): if self._iterations: prefix = "searching" else: prefix = "starting" search = '%s=%r' % (prefix, list(self._next_query)) return ('_BreadthFirstSearcher(iterations=%d, %s,' ' seen=%r)' % (self._iterations, search, list(self.seen))) def get_state(self): """Get the current state of this searcher. :return: Tuple with started keys, excludes and included keys """ if self._returning == 'next': # We have to know the current nodes children to be able to list the # exclude keys for them. However, while we could have a second # look-ahead result buffer and shuffle things around, this method # is typically only called once per search - when memoising the # results of the search. found, ghosts, next, parents = self._do_query(self._next_query) # pretend we didn't query: perhaps we should tweak _do_query to be # entirely stateless? self.seen.difference_update(next) next_query = next.union(ghosts) else: next_query = self._next_query excludes = self._stopped_keys.union(next_query) included_keys = self.seen.difference(excludes) return self._started_keys, excludes, included_keys def _get_result(self): """Get a SearchResult for the current state of this searcher. :return: A SearchResult for this search so far. The SearchResult is static - the search can be advanced and the search result will not be invalidated or altered. """ from bzrlib.vf_search import SearchResult (started_keys, excludes, included_keys) = self.get_state() return SearchResult(started_keys, excludes, len(included_keys), included_keys) def step(self): try: return self.next() except StopIteration: return () def next(self): """Return the next ancestors of this revision. Ancestors are returned in the order they are seen in a breadth-first traversal. No ancestor will be returned more than once. Ancestors are returned before their parentage is queried, so ghosts and missing revisions (including the start revisions) are included in the result. This can save a round trip in LCA style calculation by allowing convergence to be detected without reading the data for the revision the convergence occurs on. :return: A set of revision_ids. """ if self._returning != 'next': # switch to returning the query, not the results. self._returning = 'next' self._iterations += 1 else: self._advance() if len(self._next_query) == 0: raise StopIteration() # We have seen what we're querying at this point as we are returning # the query, not the results. self.seen.update(self._next_query) return self._next_query def next_with_ghosts(self): """Return the next found ancestors, with ghosts split out. Ancestors are returned in the order they are seen in a breadth-first traversal. No ancestor will be returned more than once. Ancestors are returned only after asking for their parents, which allows us to detect which revisions are ghosts and which are not. :return: A tuple with (present ancestors, ghost ancestors) sets. """ if self._returning != 'next_with_ghosts': # switch to returning the results, not the current query. self._returning = 'next_with_ghosts' self._advance() if len(self._next_query) == 0: raise StopIteration() self._advance() return self._current_present, self._current_ghosts def _advance(self): """Advance the search. Updates self.seen, self._next_query, self._current_present, self._current_ghosts, self._current_parents and self._iterations. """ self._iterations += 1 found, ghosts, next, parents = self._do_query(self._next_query) self._current_present = found self._current_ghosts = ghosts self._next_query = next self._current_parents = parents # ghosts are implicit stop points, otherwise the search cannot be # repeated when ghosts are filled. self._stopped_keys.update(ghosts) def _do_query(self, revisions): """Query for revisions. Adds revisions to the seen set. :param revisions: Revisions to query. :return: A tuple: (set(found_revisions), set(ghost_revisions), set(parents_of_found_revisions), dict(found_revisions:parents)). """ found_revisions = set() parents_of_found = set() # revisions may contain nodes that point to other nodes in revisions: # we want to filter them out. seen = self.seen seen.update(revisions) parent_map = self._parents_provider.get_parent_map(revisions) found_revisions.update(parent_map) for rev_id, parents in parent_map.iteritems(): if parents is None: continue new_found_parents = [p for p in parents if p not in seen] if new_found_parents: # Calling set.update() with an empty generator is actually # rather expensive. parents_of_found.update(new_found_parents) ghost_revisions = revisions - found_revisions return found_revisions, ghost_revisions, parents_of_found, parent_map def __iter__(self): return self def find_seen_ancestors(self, revisions): """Find ancestors of these revisions that have already been seen. This function generally makes the assumption that querying for the parents of a node that has already been queried is reasonably cheap. (eg, not a round trip to a remote host). """ # TODO: Often we might ask one searcher for its seen ancestors, and # then ask another searcher the same question. This can result in # searching the same revisions repeatedly if the two searchers # have a lot of overlap. all_seen = self.seen pending = set(revisions).intersection(all_seen) seen_ancestors = set(pending) if self._returning == 'next': # self.seen contains what nodes have been returned, not what nodes # have been queried. We don't want to probe for nodes that haven't # been searched yet. not_searched_yet = self._next_query else: not_searched_yet = () pending.difference_update(not_searched_yet) get_parent_map = self._parents_provider.get_parent_map while pending: parent_map = get_parent_map(pending) all_parents = [] # We don't care if it is a ghost, since it can't be seen if it is # a ghost for parent_ids in parent_map.itervalues(): all_parents.extend(parent_ids) next_pending = all_seen.intersection(all_parents).difference(seen_ancestors) seen_ancestors.update(next_pending) next_pending.difference_update(not_searched_yet) pending = next_pending return seen_ancestors def stop_searching_any(self, revisions): """ Remove any of the specified revisions from the search list. None of the specified revisions are required to be present in the search list. It is okay to call stop_searching_any() for revisions which were seen in previous iterations. It is the callers responsibility to call find_seen_ancestors() to make sure that current search tips that are ancestors of those revisions are also stopped. All explicitly stopped revisions will be excluded from the search result's get_keys(), though. """ # TODO: does this help performance? # if not revisions: # return set() revisions = frozenset(revisions) if self._returning == 'next': stopped = self._next_query.intersection(revisions) self._next_query = self._next_query.difference(revisions) else: stopped_present = self._current_present.intersection(revisions) stopped = stopped_present.union( self._current_ghosts.intersection(revisions)) self._current_present.difference_update(stopped) self._current_ghosts.difference_update(stopped) # stopping 'x' should stop returning parents of 'x', but # not if 'y' always references those same parents stop_rev_references = {} for rev in stopped_present: for parent_id in self._current_parents[rev]: if parent_id not in stop_rev_references: stop_rev_references[parent_id] = 0 stop_rev_references[parent_id] += 1 # if only the stopped revisions reference it, the ref count will be # 0 after this loop for parents in self._current_parents.itervalues(): for parent_id in parents: try: stop_rev_references[parent_id] -= 1 except KeyError: pass stop_parents = set() for rev_id, refs in stop_rev_references.iteritems(): if refs == 0: stop_parents.add(rev_id) self._next_query.difference_update(stop_parents) self._stopped_keys.update(stopped) self._stopped_keys.update(revisions) return stopped def start_searching(self, revisions): """Add revisions to the search. The parents of revisions will be returned from the next call to next() or next_with_ghosts(). If next_with_ghosts was the most recently used next* call then the return value is the result of looking up the ghost/not ghost status of revisions. (A tuple (present, ghosted)). """ revisions = frozenset(revisions) self._started_keys.update(revisions) new_revisions = revisions.difference(self.seen) if self._returning == 'next': self._next_query.update(new_revisions) self.seen.update(new_revisions) else: # perform a query on revisions revs, ghosts, query, parents = self._do_query(revisions) self._stopped_keys.update(ghosts) self._current_present.update(revs) self._current_ghosts.update(ghosts) self._next_query.update(query) self._current_parents.update(parents) return revs, ghosts def invert_parent_map(parent_map): """Given a map from child => parents, create a map of parent=>children""" child_map = {} for child, parents in parent_map.iteritems(): for p in parents: # Any given parent is likely to have only a small handful # of children, many will have only one. So we avoid mem overhead of # a list, in exchange for extra copying of tuples if p not in child_map: child_map[p] = (child,) else: child_map[p] = child_map[p] + (child,) return child_map def collapse_linear_regions(parent_map): """Collapse regions of the graph that are 'linear'. For example:: A:[B], B:[C] can be collapsed by removing B and getting:: A:[C] :param parent_map: A dictionary mapping children to their parents :return: Another dictionary with 'linear' chains collapsed """ # Note: this isn't a strictly minimal collapse. For example: # A # / \ # B C # \ / # D # | # E # Will not have 'D' removed, even though 'E' could fit. Also: # A # | A # B => | # | C # C # A and C are both kept because they are edges of the graph. We *could* get # rid of A if we wanted. # A # / \ # B C # | | # D E # \ / # F # Will not have any nodes removed, even though you do have an # 'uninteresting' linear D->B and E->C children = {} for child, parents in parent_map.iteritems(): children.setdefault(child, []) for p in parents: children.setdefault(p, []).append(child) orig_children = dict(children) removed = set() result = dict(parent_map) for node in parent_map: parents = result[node] if len(parents) == 1: parent_children = children[parents[0]] if len(parent_children) != 1: # This is not the only child continue node_children = children[node] if len(node_children) != 1: continue child_parents = result.get(node_children[0], None) if len(child_parents) != 1: # This is not its only parent continue # The child of this node only points at it, and the parent only has # this as a child. remove this node, and join the others together result[node_children[0]] = parents children[parents[0]] = node_children del result[node] del children[node] removed.add(node) return result class GraphThunkIdsToKeys(object): """Forwards calls about 'ids' to be about keys internally.""" def __init__(self, graph): self._graph = graph def topo_sort(self): return [r for (r,) in self._graph.topo_sort()] def heads(self, ids): """See Graph.heads()""" as_keys = [(i,) for i in ids] head_keys = self._graph.heads(as_keys) return set([h[0] for h in head_keys]) def merge_sort(self, tip_revision): nodes = self._graph.merge_sort((tip_revision,)) for node in nodes: node.key = node.key[0] return nodes def add_node(self, revision, parents): self._graph.add_node((revision,), [(p,) for p in parents]) _counters = [0,0,0,0,0,0,0] try: from bzrlib._known_graph_pyx import KnownGraph except ImportError, e: osutils.failed_to_load_extension(e) from bzrlib._known_graph_py import KnownGraph bzr-2.7.0/bzrlib/groupcompress.py0000644000000000000000000027444211673635356015245 0ustar 00000000000000# Copyright (C) 2008-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Core compression logic for compressing streams of related files.""" from __future__ import absolute_import import time import zlib from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( annotate, config, debug, errors, graph as _mod_graph, osutils, pack, static_tuple, trace, tsort, ) from bzrlib.repofmt import pack_repo from bzrlib.i18n import gettext """) from bzrlib.btree_index import BTreeBuilder from bzrlib.lru_cache import LRUSizeCache from bzrlib.versionedfile import ( _KeyRefs, adapter_registry, AbsentContentFactory, ChunkedContentFactory, FulltextContentFactory, VersionedFilesWithFallbacks, ) # Minimum number of uncompressed bytes to try fetch at once when retrieving # groupcompress blocks. BATCH_SIZE = 2**16 # osutils.sha_string('') _null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' def sort_gc_optimal(parent_map): """Sort and group the keys in parent_map into groupcompress order. groupcompress is defined (currently) as reverse-topological order, grouped by the key prefix. :return: A sorted-list of keys """ # groupcompress ordering is approximately reverse topological, # properly grouped by file-id. per_prefix_map = {} for key, value in parent_map.iteritems(): if isinstance(key, str) or len(key) == 1: prefix = '' else: prefix = key[0] try: per_prefix_map[prefix][key] = value except KeyError: per_prefix_map[prefix] = {key: value} present_keys = [] for prefix in sorted(per_prefix_map): present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix]))) return present_keys # The max zlib window size is 32kB, so if we set 'max_size' output of the # decompressor to the requested bytes + 32kB, then we should guarantee # num_bytes coming out. _ZLIB_DECOMP_WINDOW = 32*1024 class GroupCompressBlock(object): """An object which maintains the internal structure of the compressed data. This tracks the meta info (start of text, length, type, etc.) """ # Group Compress Block v1 Zlib GCB_HEADER = 'gcb1z\n' # Group Compress Block v1 Lzma GCB_LZ_HEADER = 'gcb1l\n' GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER) def __init__(self): # map by key? or just order in file? self._compressor_name = None self._z_content_chunks = None self._z_content_decompressor = None self._z_content_length = None self._content_length = None self._content = None self._content_chunks = None def __len__(self): # This is the maximum number of bytes this object will reference if # everything is decompressed. However, if we decompress less than # everything... (this would cause some problems for LRUSizeCache) return self._content_length + self._z_content_length def _ensure_content(self, num_bytes=None): """Make sure that content has been expanded enough. :param num_bytes: Ensure that we have extracted at least num_bytes of content. If None, consume everything """ if self._content_length is None: raise AssertionError('self._content_length should never be None') if num_bytes is None: num_bytes = self._content_length elif (self._content_length is not None and num_bytes > self._content_length): raise AssertionError( 'requested num_bytes (%d) > content length (%d)' % (num_bytes, self._content_length)) # Expand the content if required if self._content is None: if self._content_chunks is not None: self._content = ''.join(self._content_chunks) self._content_chunks = None if self._content is None: # We join self._z_content_chunks here, because if we are # decompressing, then it is *very* likely that we have a single # chunk if self._z_content_chunks is None: raise AssertionError('No content to decompress') z_content = ''.join(self._z_content_chunks) if z_content == '': self._content = '' elif self._compressor_name == 'lzma': # We don't do partial lzma decomp yet import pylzma self._content = pylzma.decompress(z_content) elif self._compressor_name == 'zlib': # Start a zlib decompressor if num_bytes * 4 > self._content_length * 3: # If we are requesting more that 3/4ths of the content, # just extract the whole thing in a single pass num_bytes = self._content_length self._content = zlib.decompress(z_content) else: self._z_content_decompressor = zlib.decompressobj() # Seed the decompressor with the uncompressed bytes, so # that the rest of the code is simplified self._content = self._z_content_decompressor.decompress( z_content, num_bytes + _ZLIB_DECOMP_WINDOW) if not self._z_content_decompressor.unconsumed_tail: self._z_content_decompressor = None else: raise AssertionError('Unknown compressor: %r' % self._compressor_name) # Any bytes remaining to be decompressed will be in the decompressors # 'unconsumed_tail' # Do we have enough bytes already? if len(self._content) >= num_bytes: return # If we got this far, and don't have a decompressor, something is wrong if self._z_content_decompressor is None: raise AssertionError( 'No decompressor to decompress %d bytes' % num_bytes) remaining_decomp = self._z_content_decompressor.unconsumed_tail if not remaining_decomp: raise AssertionError('Nothing left to decompress') needed_bytes = num_bytes - len(self._content) # We always set max_size to 32kB over the minimum needed, so that # zlib will give us as much as we really want. # TODO: If this isn't good enough, we could make a loop here, # that keeps expanding the request until we get enough self._content += self._z_content_decompressor.decompress( remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW) if len(self._content) < num_bytes: raise AssertionError('%d bytes wanted, only %d available' % (num_bytes, len(self._content))) if not self._z_content_decompressor.unconsumed_tail: # The stream is finished self._z_content_decompressor = None def _parse_bytes(self, bytes, pos): """Read the various lengths from the header. This also populates the various 'compressed' buffers. :return: The position in bytes just after the last newline """ # At present, we have 2 integers for the compressed and uncompressed # content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid # checking too far, cap the search to 14 bytes. pos2 = bytes.index('\n', pos, pos + 14) self._z_content_length = int(bytes[pos:pos2]) pos = pos2 + 1 pos2 = bytes.index('\n', pos, pos + 14) self._content_length = int(bytes[pos:pos2]) pos = pos2 + 1 if len(bytes) != (pos + self._z_content_length): # XXX: Define some GCCorrupt error ? raise AssertionError('Invalid bytes: (%d) != %d + %d' % (len(bytes), pos, self._z_content_length)) self._z_content_chunks = (bytes[pos:],) @property def _z_content(self): """Return z_content_chunks as a simple string. Meant only to be used by the test suite. """ if self._z_content_chunks is not None: return ''.join(self._z_content_chunks) return None @classmethod def from_bytes(cls, bytes): out = cls() if bytes[:6] not in cls.GCB_KNOWN_HEADERS: raise ValueError('bytes did not start with any of %r' % (cls.GCB_KNOWN_HEADERS,)) # XXX: why not testing the whole header ? if bytes[4] == 'z': out._compressor_name = 'zlib' elif bytes[4] == 'l': out._compressor_name = 'lzma' else: raise ValueError('unknown compressor: %r' % (bytes,)) out._parse_bytes(bytes, 6) return out def extract(self, key, start, end, sha1=None): """Extract the text for a specific key. :param key: The label used for this content :param sha1: TODO (should we validate only when sha1 is supplied?) :return: The bytes for the content """ if start == end == 0: return '' self._ensure_content(end) # The bytes are 'f' or 'd' for the type, then a variable-length # base128 integer for the content size, then the actual content # We know that the variable-length integer won't be longer than 5 # bytes (it takes 5 bytes to encode 2^32) c = self._content[start] if c == 'f': type = 'fulltext' else: if c != 'd': raise ValueError('Unknown content control code: %s' % (c,)) type = 'delta' content_len, len_len = decode_base128_int( self._content[start + 1:start + 6]) content_start = start + 1 + len_len if end != content_start + content_len: raise ValueError('end != len according to field header' ' %s != %s' % (end, content_start + content_len)) if c == 'f': bytes = self._content[content_start:end] elif c == 'd': bytes = apply_delta_to_source(self._content, content_start, end) return bytes def set_chunked_content(self, content_chunks, length): """Set the content of this block to the given chunks.""" # If we have lots of short lines, it is may be more efficient to join # the content ahead of time. If the content is <10MiB, we don't really # care about the extra memory consumption, so we can just pack it and # be done. However, timing showed 18s => 17.9s for repacking 1k revs of # mysql, which is below the noise margin self._content_length = length self._content_chunks = content_chunks self._content = None self._z_content_chunks = None def set_content(self, content): """Set the content of this block.""" self._content_length = len(content) self._content = content self._z_content_chunks = None def _create_z_content_from_chunks(self, chunks): compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION) # Peak in this point is 1 fulltext, 1 compressed text, + zlib overhead # (measured peak is maybe 30MB over the above...) compressed_chunks = map(compressor.compress, chunks) compressed_chunks.append(compressor.flush()) # Ignore empty chunks self._z_content_chunks = [c for c in compressed_chunks if c] self._z_content_length = sum(map(len, self._z_content_chunks)) def _create_z_content(self): if self._z_content_chunks is not None: return if self._content_chunks is not None: chunks = self._content_chunks else: chunks = (self._content,) self._create_z_content_from_chunks(chunks) def to_chunks(self): """Create the byte stream as a series of 'chunks'""" self._create_z_content() header = self.GCB_HEADER chunks = ['%s%d\n%d\n' % (header, self._z_content_length, self._content_length), ] chunks.extend(self._z_content_chunks) total_len = sum(map(len, chunks)) return total_len, chunks def to_bytes(self): """Encode the information into a byte stream.""" total_len, chunks = self.to_chunks() return ''.join(chunks) def _dump(self, include_text=False): """Take this block, and spit out a human-readable structure. :param include_text: Inserts also include text bits, chose whether you want this displayed in the dump or not. :return: A dump of the given block. The layout is something like: [('f', length), ('d', delta_length, text_length, [delta_info])] delta_info := [('i', num_bytes, text), ('c', offset, num_bytes), ...] """ self._ensure_content() result = [] pos = 0 while pos < self._content_length: kind = self._content[pos] pos += 1 if kind not in ('f', 'd'): raise ValueError('invalid kind character: %r' % (kind,)) content_len, len_len = decode_base128_int( self._content[pos:pos + 5]) pos += len_len if content_len + pos > self._content_length: raise ValueError('invalid content_len %d for record @ pos %d' % (content_len, pos - len_len - 1)) if kind == 'f': # Fulltext if include_text: text = self._content[pos:pos+content_len] result.append(('f', content_len, text)) else: result.append(('f', content_len)) elif kind == 'd': # Delta delta_content = self._content[pos:pos+content_len] delta_info = [] # The first entry in a delta is the decompressed length decomp_len, delta_pos = decode_base128_int(delta_content) result.append(('d', content_len, decomp_len, delta_info)) measured_len = 0 while delta_pos < content_len: c = ord(delta_content[delta_pos]) delta_pos += 1 if c & 0x80: # Copy (offset, length, delta_pos) = decode_copy_instruction(delta_content, c, delta_pos) if include_text: text = self._content[offset:offset+length] delta_info.append(('c', offset, length, text)) else: delta_info.append(('c', offset, length)) measured_len += length else: # Insert if include_text: txt = delta_content[delta_pos:delta_pos+c] else: txt = '' delta_info.append(('i', c, txt)) measured_len += c delta_pos += c if delta_pos != content_len: raise ValueError('Delta consumed a bad number of bytes:' ' %d != %d' % (delta_pos, content_len)) if measured_len != decomp_len: raise ValueError('Delta claimed fulltext was %d bytes, but' ' extraction resulted in %d bytes' % (decomp_len, measured_len)) pos += content_len return result class _LazyGroupCompressFactory(object): """Yield content from a GroupCompressBlock on demand.""" def __init__(self, key, parents, manager, start, end, first): """Create a _LazyGroupCompressFactory :param key: The key of just this record :param parents: The parents of this key (possibly None) :param gc_block: A GroupCompressBlock object :param start: Offset of the first byte for this record in the uncompressd content :param end: Offset of the byte just after the end of this record (ie, bytes = content[start:end]) :param first: Is this the first Factory for the given block? """ self.key = key self.parents = parents self.sha1 = None # Note: This attribute coupled with Manager._factories creates a # reference cycle. Perhaps we would rather use a weakref(), or # find an appropriate time to release the ref. After the first # get_bytes_as call? After Manager.get_record_stream() returns # the object? self._manager = manager self._bytes = None self.storage_kind = 'groupcompress-block' if not first: self.storage_kind = 'groupcompress-block-ref' self._first = first self._start = start self._end = end def __repr__(self): return '%s(%s, first=%s)' % (self.__class__.__name__, self.key, self._first) def get_bytes_as(self, storage_kind): if storage_kind == self.storage_kind: if self._first: # wire bytes, something... return self._manager._wire_bytes() else: return '' if storage_kind in ('fulltext', 'chunked'): if self._bytes is None: # Grab and cache the raw bytes for this entry # and break the ref-cycle with _manager since we don't need it # anymore try: self._manager._prepare_for_extract() except zlib.error as value: raise errors.DecompressCorruption("zlib: " + str(value)) block = self._manager._block self._bytes = block.extract(self.key, self._start, self._end) # There are code paths that first extract as fulltext, and then # extract as storage_kind (smart fetch). So we don't break the # refcycle here, but instead in manager.get_record_stream() if storage_kind == 'fulltext': return self._bytes else: return [self._bytes] raise errors.UnavailableRepresentation(self.key, storage_kind, self.storage_kind) class _LazyGroupContentManager(object): """This manages a group of _LazyGroupCompressFactory objects.""" _max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of # current size, and still be considered # resuable _full_block_size = 4*1024*1024 _full_mixed_block_size = 2*1024*1024 _full_enough_block_size = 3*1024*1024 # size at which we won't repack _full_enough_mixed_block_size = 2*768*1024 # 1.5MB def __init__(self, block, get_compressor_settings=None): self._block = block # We need to preserve the ordering self._factories = [] self._last_byte = 0 self._get_settings = get_compressor_settings self._compressor_settings = None def _get_compressor_settings(self): if self._compressor_settings is not None: return self._compressor_settings settings = None if self._get_settings is not None: settings = self._get_settings() if settings is None: vf = GroupCompressVersionedFiles settings = vf._DEFAULT_COMPRESSOR_SETTINGS self._compressor_settings = settings return self._compressor_settings def add_factory(self, key, parents, start, end): if not self._factories: first = True else: first = False # Note that this creates a reference cycle.... factory = _LazyGroupCompressFactory(key, parents, self, start, end, first=first) # max() works here, but as a function call, doing a compare seems to be # significantly faster, timeit says 250ms for max() and 100ms for the # comparison if end > self._last_byte: self._last_byte = end self._factories.append(factory) def get_record_stream(self): """Get a record for all keys added so far.""" for factory in self._factories: yield factory # Break the ref-cycle factory._bytes = None factory._manager = None # TODO: Consider setting self._factories = None after the above loop, # as it will break the reference cycle def _trim_block(self, last_byte): """Create a new GroupCompressBlock, with just some of the content.""" # None of the factories need to be adjusted, because the content is # located in an identical place. Just that some of the unreferenced # trailing bytes are stripped trace.mutter('stripping trailing bytes from groupcompress block' ' %d => %d', self._block._content_length, last_byte) new_block = GroupCompressBlock() self._block._ensure_content(last_byte) new_block.set_content(self._block._content[:last_byte]) self._block = new_block def _make_group_compressor(self): return GroupCompressor(self._get_compressor_settings()) def _rebuild_block(self): """Create a new GroupCompressBlock with only the referenced texts.""" compressor = self._make_group_compressor() tstart = time.time() old_length = self._block._content_length end_point = 0 for factory in self._factories: bytes = factory.get_bytes_as('fulltext') (found_sha1, start_point, end_point, type) = compressor.compress(factory.key, bytes, factory.sha1) # Now update this factory with the new offsets, etc factory.sha1 = found_sha1 factory._start = start_point factory._end = end_point self._last_byte = end_point new_block = compressor.flush() # TODO: Should we check that new_block really *is* smaller than the old # block? It seems hard to come up with a method that it would # expand, since we do full compression again. Perhaps based on a # request that ends up poorly ordered? # TODO: If the content would have expanded, then we would want to # handle a case where we need to split the block. # Now that we have a user-tweakable option # (max_bytes_to_index), it is possible that one person set it # to a very low value, causing poor compression. delta = time.time() - tstart self._block = new_block trace.mutter('creating new compressed block on-the-fly in %.3fs' ' %d bytes => %d bytes', delta, old_length, self._block._content_length) def _prepare_for_extract(self): """A _LazyGroupCompressFactory is about to extract to fulltext.""" # We expect that if one child is going to fulltext, all will be. This # helps prevent all of them from extracting a small amount at a time. # Which in itself isn't terribly expensive, but resizing 2MB 32kB at a # time (self._block._content) is a little expensive. self._block._ensure_content(self._last_byte) def _check_rebuild_action(self): """Check to see if our block should be repacked.""" total_bytes_used = 0 last_byte_used = 0 for factory in self._factories: total_bytes_used += factory._end - factory._start if last_byte_used < factory._end: last_byte_used = factory._end # If we are using more than half of the bytes from the block, we have # nothing else to check if total_bytes_used * 2 >= self._block._content_length: return None, last_byte_used, total_bytes_used # We are using less than 50% of the content. Is the content we are # using at the beginning of the block? If so, we can just trim the # tail, rather than rebuilding from scratch. if total_bytes_used * 2 > last_byte_used: return 'trim', last_byte_used, total_bytes_used # We are using a small amount of the data, and it isn't just packed # nicely at the front, so rebuild the content. # Note: This would be *nicer* as a strip-data-from-group, rather than # building it up again from scratch # It might be reasonable to consider the fulltext sizes for # different bits when deciding this, too. As you may have a small # fulltext, and a trivial delta, and you are just trading around # for another fulltext. If we do a simple 'prune' you may end up # expanding many deltas into fulltexts, as well. # If we build a cheap enough 'strip', then we could try a strip, # if that expands the content, we then rebuild. return 'rebuild', last_byte_used, total_bytes_used def check_is_well_utilized(self): """Is the current block considered 'well utilized'? This heuristic asks if the current block considers itself to be a fully developed group, rather than just a loose collection of data. """ if len(self._factories) == 1: # A block of length 1 could be improved by combining with other # groups - don't look deeper. Even larger than max size groups # could compress well with adjacent versions of the same thing. return False action, last_byte_used, total_bytes_used = self._check_rebuild_action() block_size = self._block._content_length if total_bytes_used < block_size * self._max_cut_fraction: # This block wants to trim itself small enough that we want to # consider it under-utilized. return False # TODO: This code is meant to be the twin of _insert_record_stream's # 'start_new_block' logic. It would probably be better to factor # out that logic into a shared location, so that it stays # together better # We currently assume a block is properly utilized whenever it is >75% # of the size of a 'full' block. In normal operation, a block is # considered full when it hits 4MB of same-file content. So any block # >3MB is 'full enough'. # The only time this isn't true is when a given block has large-object # content. (a single file >4MB, etc.) # Under these circumstances, we allow a block to grow to # 2 x largest_content. Which means that if a given block had a large # object, it may actually be under-utilized. However, given that this # is 'pack-on-the-fly' it is probably reasonable to not repack large # content blobs on-the-fly. Note that because we return False for all # 1-item blobs, we will repack them; we may wish to reevaluate our # treatment of large object blobs in the future. if block_size >= self._full_enough_block_size: return True # If a block is <3MB, it still may be considered 'full' if it contains # mixed content. The current rule is 2MB of mixed content is considered # full. So check to see if this block contains mixed content, and # set the threshold appropriately. common_prefix = None for factory in self._factories: prefix = factory.key[:-1] if common_prefix is None: common_prefix = prefix elif prefix != common_prefix: # Mixed content, check the size appropriately if block_size >= self._full_enough_mixed_block_size: return True break # The content failed both the mixed check and the single-content check # so obviously it is not fully utilized # TODO: there is one other constraint that isn't being checked # namely, that the entries in the block are in the appropriate # order. For example, you could insert the entries in exactly # reverse groupcompress order, and we would think that is ok. # (all the right objects are in one group, and it is fully # utilized, etc.) For now, we assume that case is rare, # especially since we should always fetch in 'groupcompress' # order. return False def _check_rebuild_block(self): action, last_byte_used, total_bytes_used = self._check_rebuild_action() if action is None: return if action == 'trim': self._trim_block(last_byte_used) elif action == 'rebuild': self._rebuild_block() else: raise ValueError('unknown rebuild action: %r' % (action,)) def _wire_bytes(self): """Return a byte stream suitable for transmitting over the wire.""" self._check_rebuild_block() # The outer block starts with: # 'groupcompress-block\n' # \n # \n # \n #
# lines = ['groupcompress-block\n'] # The minimal info we need is the key, the start offset, and the # parents. The length and type are encoded in the record itself. # However, passing in the other bits makes it easier. The list of # keys, and the start offset, the length # 1 line key # 1 line with parents, '' for () # 1 line for start offset # 1 line for end byte header_lines = [] for factory in self._factories: key_bytes = '\x00'.join(factory.key) parents = factory.parents if parents is None: parent_bytes = 'None:' else: parent_bytes = '\t'.join('\x00'.join(key) for key in parents) record_header = '%s\n%s\n%d\n%d\n' % ( key_bytes, parent_bytes, factory._start, factory._end) header_lines.append(record_header) # TODO: Can we break the refcycle at this point and set # factory._manager = None? header_bytes = ''.join(header_lines) del header_lines header_bytes_len = len(header_bytes) z_header_bytes = zlib.compress(header_bytes) del header_bytes z_header_bytes_len = len(z_header_bytes) block_bytes_len, block_chunks = self._block.to_chunks() lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len, block_bytes_len)) lines.append(z_header_bytes) lines.extend(block_chunks) del z_header_bytes, block_chunks # TODO: This is a point where we will double the memory consumption. To # avoid this, we probably have to switch to a 'chunked' api return ''.join(lines) @classmethod def from_bytes(cls, bytes): # TODO: This does extra string copying, probably better to do it a # different way. At a minimum this creates 2 copies of the # compressed content (storage_kind, z_header_len, header_len, block_len, rest) = bytes.split('\n', 4) del bytes if storage_kind != 'groupcompress-block': raise ValueError('Unknown storage kind: %s' % (storage_kind,)) z_header_len = int(z_header_len) if len(rest) < z_header_len: raise ValueError('Compressed header len shorter than all bytes') z_header = rest[:z_header_len] header_len = int(header_len) header = zlib.decompress(z_header) if len(header) != header_len: raise ValueError('invalid length for decompressed bytes') del z_header block_len = int(block_len) if len(rest) != z_header_len + block_len: raise ValueError('Invalid length for block') block_bytes = rest[z_header_len:] del rest # So now we have a valid GCB, we just need to parse the factories that # were sent to us header_lines = header.split('\n') del header last = header_lines.pop() if last != '': raise ValueError('header lines did not end with a trailing' ' newline') if len(header_lines) % 4 != 0: raise ValueError('The header was not an even multiple of 4 lines') block = GroupCompressBlock.from_bytes(block_bytes) del block_bytes result = cls(block) for start in xrange(0, len(header_lines), 4): # intern()? key = tuple(header_lines[start].split('\x00')) parents_line = header_lines[start+1] if parents_line == 'None:': parents = None else: parents = tuple([tuple(segment.split('\x00')) for segment in parents_line.split('\t') if segment]) start_offset = int(header_lines[start+2]) end_offset = int(header_lines[start+3]) result.add_factory(key, parents, start_offset, end_offset) return result def network_block_to_records(storage_kind, bytes, line_end): if storage_kind != 'groupcompress-block': raise ValueError('Unknown storage kind: %s' % (storage_kind,)) manager = _LazyGroupContentManager.from_bytes(bytes) return manager.get_record_stream() class _CommonGroupCompressor(object): def __init__(self, settings=None): """Create a GroupCompressor.""" self.chunks = [] self._last = None self.endpoint = 0 self.input_bytes = 0 self.labels_deltas = {} self._delta_index = None # Set by the children self._block = GroupCompressBlock() if settings is None: self._settings = {} else: self._settings = settings def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False): """Compress lines with label key. :param key: A key tuple. It is stored in the output for identification of the text during decompression. If the last element is 'None' it is replaced with the sha1 of the text - e.g. sha1:xxxxxxx. :param bytes: The bytes to be compressed :param expected_sha: If non-None, the sha the lines are believed to have. During compression the sha is calculated; a mismatch will cause an error. :param nostore_sha: If the computed sha1 sum matches, we will raise ExistingContent rather than adding the text. :param soft: Do a 'soft' compression. This means that we require larger ranges to match to be considered for a copy command. :return: The sha1 of lines, the start and end offsets in the delta, and the type ('fulltext' or 'delta'). :seealso VersionedFiles.add_lines: """ if not bytes: # empty, like a dir entry, etc if nostore_sha == _null_sha1: raise errors.ExistingContent() return _null_sha1, 0, 0, 'fulltext' # we assume someone knew what they were doing when they passed it in if expected_sha is not None: sha1 = expected_sha else: sha1 = osutils.sha_string(bytes) if nostore_sha is not None: if sha1 == nostore_sha: raise errors.ExistingContent() if key[-1] is None: key = key[:-1] + ('sha1:' + sha1,) start, end, type = self._compress(key, bytes, len(bytes) / 2, soft) return sha1, start, end, type def _compress(self, key, bytes, max_delta_size, soft=False): """Compress lines with label key. :param key: A key tuple. It is stored in the output for identification of the text during decompression. :param bytes: The bytes to be compressed :param max_delta_size: The size above which we issue a fulltext instead of a delta. :param soft: Do a 'soft' compression. This means that we require larger ranges to match to be considered for a copy command. :return: The sha1 of lines, the start and end offsets in the delta, and the type ('fulltext' or 'delta'). """ raise NotImplementedError(self._compress) def extract(self, key): """Extract a key previously added to the compressor. :param key: The key to extract. :return: An iterable over bytes and the sha1. """ (start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key] delta_chunks = self.chunks[start_chunk:end_chunk] stored_bytes = ''.join(delta_chunks) if stored_bytes[0] == 'f': fulltext_len, offset = decode_base128_int(stored_bytes[1:10]) data_len = fulltext_len + 1 + offset if data_len != len(stored_bytes): raise ValueError('Index claimed fulltext len, but stored bytes' ' claim %s != %s' % (len(stored_bytes), data_len)) bytes = stored_bytes[offset + 1:] else: # XXX: This is inefficient at best source = ''.join(self.chunks[:start_chunk]) if stored_bytes[0] != 'd': raise ValueError('Unknown content kind, bytes claim %s' % (stored_bytes[0],)) delta_len, offset = decode_base128_int(stored_bytes[1:10]) data_len = delta_len + 1 + offset if data_len != len(stored_bytes): raise ValueError('Index claimed delta len, but stored bytes' ' claim %s != %s' % (len(stored_bytes), data_len)) bytes = apply_delta(source, stored_bytes[offset + 1:]) bytes_sha1 = osutils.sha_string(bytes) return bytes, bytes_sha1 def flush(self): """Finish this group, creating a formatted stream. After calling this, the compressor should no longer be used """ self._block.set_chunked_content(self.chunks, self.endpoint) self.chunks = None self._delta_index = None return self._block def pop_last(self): """Call this if you want to 'revoke' the last compression. After this, the data structures will be rolled back, but you cannot do more compression. """ self._delta_index = None del self.chunks[self._last[0]:] self.endpoint = self._last[1] self._last = None def ratio(self): """Return the overall compression ratio.""" return float(self.input_bytes) / float(self.endpoint) class PythonGroupCompressor(_CommonGroupCompressor): def __init__(self, settings=None): """Create a GroupCompressor. Used only if the pyrex version is not available. """ super(PythonGroupCompressor, self).__init__(settings) self._delta_index = LinesDeltaIndex([]) # The actual content is managed by LinesDeltaIndex self.chunks = self._delta_index.lines def _compress(self, key, bytes, max_delta_size, soft=False): """see _CommonGroupCompressor._compress""" input_len = len(bytes) new_lines = osutils.split_lines(bytes) out_lines, index_lines = self._delta_index.make_delta( new_lines, bytes_length=input_len, soft=soft) delta_length = sum(map(len, out_lines)) if delta_length > max_delta_size: # The delta is longer than the fulltext, insert a fulltext type = 'fulltext' out_lines = ['f', encode_base128_int(input_len)] out_lines.extend(new_lines) index_lines = [False, False] index_lines.extend([True] * len(new_lines)) else: # this is a worthy delta, output it type = 'delta' out_lines[0] = 'd' # Update the delta_length to include those two encoded integers out_lines[1] = encode_base128_int(delta_length) # Before insertion start = self.endpoint chunk_start = len(self.chunks) self._last = (chunk_start, self.endpoint) self._delta_index.extend_lines(out_lines, index_lines) self.endpoint = self._delta_index.endpoint self.input_bytes += input_len chunk_end = len(self.chunks) self.labels_deltas[key] = (start, chunk_start, self.endpoint, chunk_end) return start, self.endpoint, type class PyrexGroupCompressor(_CommonGroupCompressor): """Produce a serialised group of compressed texts. It contains code very similar to SequenceMatcher because of having a similar task. However some key differences apply: * there is no junk, we want a minimal edit not a human readable diff. * we don't filter very common lines (because we don't know where a good range will start, and after the first text we want to be emitting minmal edits only. * we chain the left side, not the right side * we incrementally update the adjacency matrix as new lines are provided. * we look for matches in all of the left side, so the routine which does the analagous task of find_longest_match does not need to filter on the left side. """ def __init__(self, settings=None): super(PyrexGroupCompressor, self).__init__(settings) max_bytes_to_index = self._settings.get('max_bytes_to_index', 0) self._delta_index = DeltaIndex(max_bytes_to_index=max_bytes_to_index) def _compress(self, key, bytes, max_delta_size, soft=False): """see _CommonGroupCompressor._compress""" input_len = len(bytes) # By having action/label/sha1/len, we can parse the group if the index # was ever destroyed, we have the key in 'label', we know the final # bytes are valid from sha1, and we know where to find the end of this # record because of 'len'. (the delta record itself will store the # total length for the expanded record) # 'len: %d\n' costs approximately 1% increase in total data # Having the labels at all costs us 9-10% increase, 38% increase for # inventory pages, and 5.8% increase for text pages # new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)] if self._delta_index._source_offset != self.endpoint: raise AssertionError('_source_offset != endpoint' ' somehow the DeltaIndex got out of sync with' ' the output lines') delta = self._delta_index.make_delta(bytes, max_delta_size) if (delta is None): type = 'fulltext' enc_length = encode_base128_int(len(bytes)) len_mini_header = 1 + len(enc_length) self._delta_index.add_source(bytes, len_mini_header) new_chunks = ['f', enc_length, bytes] else: type = 'delta' enc_length = encode_base128_int(len(delta)) len_mini_header = 1 + len(enc_length) new_chunks = ['d', enc_length, delta] self._delta_index.add_delta_source(delta, len_mini_header) # Before insertion start = self.endpoint chunk_start = len(self.chunks) # Now output these bytes self._output_chunks(new_chunks) self.input_bytes += input_len chunk_end = len(self.chunks) self.labels_deltas[key] = (start, chunk_start, self.endpoint, chunk_end) if not self._delta_index._source_offset == self.endpoint: raise AssertionError('the delta index is out of sync' 'with the output lines %s != %s' % (self._delta_index._source_offset, self.endpoint)) return start, self.endpoint, type def _output_chunks(self, new_chunks): """Output some chunks. :param new_chunks: The chunks to output. """ self._last = (len(self.chunks), self.endpoint) endpoint = self.endpoint self.chunks.extend(new_chunks) endpoint += sum(map(len, new_chunks)) self.endpoint = endpoint def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True): """Create a factory for creating a pack based groupcompress. This is only functional enough to run interface tests, it doesn't try to provide a full pack environment. :param graph: Store a graph. :param delta: Delta compress contents. :param keylength: How long should keys be. """ def factory(transport): parents = graph ref_length = 0 if graph: ref_length = 1 graph_index = BTreeBuilder(reference_lists=ref_length, key_elements=keylength) stream = transport.open_write_stream('newpack') writer = pack.ContainerWriter(stream.write) writer.begin() index = _GCGraphIndex(graph_index, lambda:True, parents=parents, add_callback=graph_index.add_nodes, inconsistency_fatal=inconsistency_fatal) access = pack_repo._DirectPackAccess({}) access.set_writer(writer, graph_index, (transport, 'newpack')) result = GroupCompressVersionedFiles(index, access, delta) result.stream = stream result.writer = writer return result return factory def cleanup_pack_group(versioned_files): versioned_files.writer.end() versioned_files.stream.close() class _BatchingBlockFetcher(object): """Fetch group compress blocks in batches. :ivar total_bytes: int of expected number of bytes needed to fetch the currently pending batch. """ def __init__(self, gcvf, locations, get_compressor_settings=None): self.gcvf = gcvf self.locations = locations self.keys = [] self.batch_memos = {} self.memos_to_get = [] self.total_bytes = 0 self.last_read_memo = None self.manager = None self._get_compressor_settings = get_compressor_settings def add_key(self, key): """Add another to key to fetch. :return: The estimated number of bytes needed to fetch the batch so far. """ self.keys.append(key) index_memo, _, _, _ = self.locations[key] read_memo = index_memo[0:3] # Three possibilities for this read_memo: # - it's already part of this batch; or # - it's not yet part of this batch, but is already cached; or # - it's not yet part of this batch and will need to be fetched. if read_memo in self.batch_memos: # This read memo is already in this batch. return self.total_bytes try: cached_block = self.gcvf._group_cache[read_memo] except KeyError: # This read memo is new to this batch, and the data isn't cached # either. self.batch_memos[read_memo] = None self.memos_to_get.append(read_memo) byte_length = read_memo[2] self.total_bytes += byte_length else: # This read memo is new to this batch, but cached. # Keep a reference to the cached block in batch_memos because it's # certain that we'll use it when this batch is processed, but # there's a risk that it would fall out of _group_cache between now # and then. self.batch_memos[read_memo] = cached_block return self.total_bytes def _flush_manager(self): if self.manager is not None: for factory in self.manager.get_record_stream(): yield factory self.manager = None self.last_read_memo = None def yield_factories(self, full_flush=False): """Yield factories for keys added since the last yield. They will be returned in the order they were added via add_key. :param full_flush: by default, some results may not be returned in case they can be part of the next batch. If full_flush is True, then all results are returned. """ if self.manager is None and not self.keys: return # Fetch all memos in this batch. blocks = self.gcvf._get_blocks(self.memos_to_get) # Turn blocks into factories and yield them. memos_to_get_stack = list(self.memos_to_get) memos_to_get_stack.reverse() for key in self.keys: index_memo, _, parents, _ = self.locations[key] read_memo = index_memo[:3] if self.last_read_memo != read_memo: # We are starting a new block. If we have a # manager, we have found everything that fits for # now, so yield records for factory in self._flush_manager(): yield factory # Now start a new manager. if memos_to_get_stack and memos_to_get_stack[-1] == read_memo: # The next block from _get_blocks will be the block we # need. block_read_memo, block = blocks.next() if block_read_memo != read_memo: raise AssertionError( "block_read_memo out of sync with read_memo" "(%r != %r)" % (block_read_memo, read_memo)) self.batch_memos[read_memo] = block memos_to_get_stack.pop() else: block = self.batch_memos[read_memo] self.manager = _LazyGroupContentManager(block, get_compressor_settings=self._get_compressor_settings) self.last_read_memo = read_memo start, end = index_memo[3:5] self.manager.add_factory(key, parents, start, end) if full_flush: for factory in self._flush_manager(): yield factory del self.keys[:] self.batch_memos.clear() del self.memos_to_get[:] self.total_bytes = 0 class GroupCompressVersionedFiles(VersionedFilesWithFallbacks): """A group-compress based VersionedFiles implementation.""" # This controls how the GroupCompress DeltaIndex works. Basically, we # compute hash pointers into the source blocks (so hash(text) => text). # However each of these references costs some memory in trade against a # more accurate match result. For very large files, they either are # pre-compressed and change in bulk whenever they change, or change in just # local blocks. Either way, 'improved resolution' is not very helpful, # versus running out of memory trying to track everything. The default max # gives 100% sampling of a 1MB file. _DEFAULT_MAX_BYTES_TO_INDEX = 1024 * 1024 _DEFAULT_COMPRESSOR_SETTINGS = {'max_bytes_to_index': _DEFAULT_MAX_BYTES_TO_INDEX} def __init__(self, index, access, delta=True, _unadded_refs=None, _group_cache=None): """Create a GroupCompressVersionedFiles object. :param index: The index object storing access and graph data. :param access: The access object storing raw data. :param delta: Whether to delta compress or just entropy compress. :param _unadded_refs: private parameter, don't use. :param _group_cache: private parameter, don't use. """ self._index = index self._access = access self._delta = delta if _unadded_refs is None: _unadded_refs = {} self._unadded_refs = _unadded_refs if _group_cache is None: _group_cache = LRUSizeCache(max_size=50*1024*1024) self._group_cache = _group_cache self._immediate_fallback_vfs = [] self._max_bytes_to_index = None def without_fallbacks(self): """Return a clone of this object without any fallbacks configured.""" return GroupCompressVersionedFiles(self._index, self._access, self._delta, _unadded_refs=dict(self._unadded_refs), _group_cache=self._group_cache) def add_lines(self, key, parents, lines, parent_texts=None, left_matching_blocks=None, nostore_sha=None, random_id=False, check_content=True): """Add a text to the store. :param key: The key tuple of the text to add. :param parents: The parents key tuples of the text to add. :param lines: A list of lines. Each line must be a bytestring. And all of them except the last must be terminated with \\n and contain no other \\n's. The last line may either contain no \\n's or a single terminating \\n. If the lines list does meet this constraint the add routine may error or may succeed - but you will be unable to read the data back accurately. (Checking the lines have been split correctly is expensive and extremely unlikely to catch bugs so it is not done at runtime unless check_content is True.) :param parent_texts: An optional dictionary containing the opaque representations of some or all of the parents of version_id to allow delta optimisations. VERY IMPORTANT: the texts must be those returned by add_lines or data corruption can be caused. :param left_matching_blocks: a hint about which areas are common between the text and its left-hand-parent. The format is the SequenceMatcher.get_matching_blocks format. :param nostore_sha: Raise ExistingContent and do not add the lines to the versioned file if the digest of the lines matches this. :param random_id: If True a random id has been selected rather than an id determined by some deterministic process such as a converter from a foreign VCS. When True the backend may choose not to check for uniqueness of the resulting key within the versioned file, so this should only be done when the result is expected to be unique anyway. :param check_content: If True, the lines supplied are verified to be bytestrings that are correctly formed lines. :return: The text sha1, the number of bytes in the text, and an opaque representation of the inserted version which can be provided back to future add_lines calls in the parent_texts dictionary. """ self._index._check_write_ok() self._check_add(key, lines, random_id, check_content) if parents is None: # The caller might pass None if there is no graph data, but kndx # indexes can't directly store that, so we give them # an empty tuple instead. parents = () # double handling for now. Make it work until then. length = sum(map(len, lines)) record = ChunkedContentFactory(key, parents, None, lines) sha1 = list(self._insert_record_stream([record], random_id=random_id, nostore_sha=nostore_sha))[0] return sha1, length, None def _add_text(self, key, parents, text, nostore_sha=None, random_id=False): """See VersionedFiles._add_text().""" self._index._check_write_ok() self._check_add(key, None, random_id, check_content=False) if text.__class__ is not str: raise errors.BzrBadParameterUnicode("text") if parents is None: # The caller might pass None if there is no graph data, but kndx # indexes can't directly store that, so we give them # an empty tuple instead. parents = () # double handling for now. Make it work until then. length = len(text) record = FulltextContentFactory(key, parents, None, text) sha1 = list(self._insert_record_stream([record], random_id=random_id, nostore_sha=nostore_sha))[0] return sha1, length, None def add_fallback_versioned_files(self, a_versioned_files): """Add a source of texts for texts not present in this knit. :param a_versioned_files: A VersionedFiles object. """ self._immediate_fallback_vfs.append(a_versioned_files) def annotate(self, key): """See VersionedFiles.annotate.""" ann = annotate.Annotator(self) return ann.annotate_flat(key) def get_annotator(self): return annotate.Annotator(self) def check(self, progress_bar=None, keys=None): """See VersionedFiles.check().""" if keys is None: keys = self.keys() for record in self.get_record_stream(keys, 'unordered', True): record.get_bytes_as('fulltext') else: return self.get_record_stream(keys, 'unordered', True) def clear_cache(self): """See VersionedFiles.clear_cache()""" self._group_cache.clear() self._index._graph_index.clear_cache() self._index._int_cache.clear() def _check_add(self, key, lines, random_id, check_content): """check that version_id and lines are safe to add.""" version_id = key[-1] if version_id is not None: if osutils.contains_whitespace(version_id): raise errors.InvalidRevisionId(version_id, self) self.check_not_reserved_id(version_id) # TODO: If random_id==False and the key is already present, we should # probably check that the existing content is identical to what is # being inserted, and otherwise raise an exception. This would make # the bundle code simpler. if check_content: self._check_lines_not_unicode(lines) self._check_lines_are_lines(lines) def get_parent_map(self, keys): """Get a map of the graph parents of keys. :param keys: The keys to look up parents for. :return: A mapping from keys to parents. Absent keys are absent from the mapping. """ return self._get_parent_map_with_sources(keys)[0] def _get_parent_map_with_sources(self, keys): """Get a map of the parents of keys. :param keys: The keys to look up parents for. :return: A tuple. The first element is a mapping from keys to parents. Absent keys are absent from the mapping. The second element is a list with the locations each key was found in. The first element is the in-this-knit parents, the second the first fallback source, and so on. """ result = {} sources = [self._index] + self._immediate_fallback_vfs source_results = [] missing = set(keys) for source in sources: if not missing: break new_result = source.get_parent_map(missing) source_results.append(new_result) result.update(new_result) missing.difference_update(set(new_result)) return result, source_results def _get_blocks(self, read_memos): """Get GroupCompressBlocks for the given read_memos. :returns: a series of (read_memo, block) pairs, in the order they were originally passed. """ cached = {} for read_memo in read_memos: try: block = self._group_cache[read_memo] except KeyError: pass else: cached[read_memo] = block not_cached = [] not_cached_seen = set() for read_memo in read_memos: if read_memo in cached: # Don't fetch what we already have continue if read_memo in not_cached_seen: # Don't try to fetch the same data twice continue not_cached.append(read_memo) not_cached_seen.add(read_memo) raw_records = self._access.get_raw_records(not_cached) for read_memo in read_memos: try: yield read_memo, cached[read_memo] except KeyError: # Read the block, and cache it. zdata = raw_records.next() block = GroupCompressBlock.from_bytes(zdata) self._group_cache[read_memo] = block cached[read_memo] = block yield read_memo, block def get_missing_compression_parent_keys(self): """Return the keys of missing compression parents. Missing compression parents occur when a record stream was missing basis texts, or a index was scanned that had missing basis texts. """ # GroupCompress cannot currently reference texts that are not in the # group, so this is valid for now return frozenset() def get_record_stream(self, keys, ordering, include_delta_closure): """Get a stream of records for keys. :param keys: The keys to include. :param ordering: Either 'unordered' or 'topological'. A topologically sorted stream has compression parents strictly before their children. :param include_delta_closure: If True then the closure across any compression parents will be included (in the opaque data). :return: An iterator of ContentFactory objects, each of which is only valid until the iterator is advanced. """ # keys might be a generator orig_keys = list(keys) keys = set(keys) if not keys: return if (not self._index.has_graph and ordering in ('topological', 'groupcompress')): # Cannot topological order when no graph has been stored. # but we allow 'as-requested' or 'unordered' ordering = 'unordered' remaining_keys = keys while True: try: keys = set(remaining_keys) for content_factory in self._get_remaining_record_stream(keys, orig_keys, ordering, include_delta_closure): remaining_keys.discard(content_factory.key) yield content_factory return except errors.RetryWithNewPacks, e: self._access.reload_or_raise(e) def _find_from_fallback(self, missing): """Find whatever keys you can from the fallbacks. :param missing: A set of missing keys. This set will be mutated as keys are found from a fallback_vfs :return: (parent_map, key_to_source_map, source_results) parent_map the overall key => parent_keys key_to_source_map a dict from {key: source} source_results a list of (source: keys) """ parent_map = {} key_to_source_map = {} source_results = [] for source in self._immediate_fallback_vfs: if not missing: break source_parents = source.get_parent_map(missing) parent_map.update(source_parents) source_parents = list(source_parents) source_results.append((source, source_parents)) key_to_source_map.update((key, source) for key in source_parents) missing.difference_update(source_parents) return parent_map, key_to_source_map, source_results def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map): """Get the (source, [keys]) list. The returned objects should be in the order defined by 'ordering', which can weave between different sources. :param ordering: Must be one of 'topological' or 'groupcompress' :return: List of [(source, [keys])] tuples, such that all keys are in the defined order, regardless of source. """ if ordering == 'topological': present_keys = tsort.topo_sort(parent_map) else: # ordering == 'groupcompress' # XXX: This only optimizes for the target ordering. We may need # to balance that with the time it takes to extract # ordering, by somehow grouping based on # locations[key][0:3] present_keys = sort_gc_optimal(parent_map) # Now group by source: source_keys = [] current_source = None for key in present_keys: source = key_to_source_map.get(key, self) if source is not current_source: source_keys.append((source, [])) current_source = source source_keys[-1][1].append(key) return source_keys def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys, key_to_source_map): source_keys = [] current_source = None for key in orig_keys: if key in locations or key in unadded_keys: source = self elif key in key_to_source_map: source = key_to_source_map[key] else: # absent continue if source is not current_source: source_keys.append((source, [])) current_source = source source_keys[-1][1].append(key) return source_keys def _get_io_ordered_source_keys(self, locations, unadded_keys, source_result): def get_group(key): # This is the group the bytes are stored in, followed by the # location in the group return locations[key][0] present_keys = sorted(locations.iterkeys(), key=get_group) # We don't have an ordering for keys in the in-memory object, but # lets process the in-memory ones first. present_keys = list(unadded_keys) + present_keys # Now grab all of the ones from other sources source_keys = [(self, present_keys)] source_keys.extend(source_result) return source_keys def _get_remaining_record_stream(self, keys, orig_keys, ordering, include_delta_closure): """Get a stream of records for keys. :param keys: The keys to include. :param ordering: one of 'unordered', 'topological', 'groupcompress' or 'as-requested' :param include_delta_closure: If True then the closure across any compression parents will be included (in the opaque data). :return: An iterator of ContentFactory objects, each of which is only valid until the iterator is advanced. """ # Cheap: iterate locations = self._index.get_build_details(keys) unadded_keys = set(self._unadded_refs).intersection(keys) missing = keys.difference(locations) missing.difference_update(unadded_keys) (fallback_parent_map, key_to_source_map, source_result) = self._find_from_fallback(missing) if ordering in ('topological', 'groupcompress'): # would be better to not globally sort initially but instead # start with one key, recurse to its oldest parent, then grab # everything in the same group, etc. parent_map = dict((key, details[2]) for key, details in locations.iteritems()) for key in unadded_keys: parent_map[key] = self._unadded_refs[key] parent_map.update(fallback_parent_map) source_keys = self._get_ordered_source_keys(ordering, parent_map, key_to_source_map) elif ordering == 'as-requested': source_keys = self._get_as_requested_source_keys(orig_keys, locations, unadded_keys, key_to_source_map) else: # We want to yield the keys in a semi-optimal (read-wise) ordering. # Otherwise we thrash the _group_cache and destroy performance source_keys = self._get_io_ordered_source_keys(locations, unadded_keys, source_result) for key in missing: yield AbsentContentFactory(key) # Batch up as many keys as we can until either: # - we encounter an unadded ref, or # - we run out of keys, or # - the total bytes to retrieve for this batch > BATCH_SIZE batcher = _BatchingBlockFetcher(self, locations, get_compressor_settings=self._get_compressor_settings) for source, keys in source_keys: if source is self: for key in keys: if key in self._unadded_refs: # Flush batch, then yield unadded ref from # self._compressor. for factory in batcher.yield_factories(full_flush=True): yield factory bytes, sha1 = self._compressor.extract(key) parents = self._unadded_refs[key] yield FulltextContentFactory(key, parents, sha1, bytes) continue if batcher.add_key(key) > BATCH_SIZE: # Ok, this batch is big enough. Yield some results. for factory in batcher.yield_factories(): yield factory else: for factory in batcher.yield_factories(full_flush=True): yield factory for record in source.get_record_stream(keys, ordering, include_delta_closure): yield record for factory in batcher.yield_factories(full_flush=True): yield factory def get_sha1s(self, keys): """See VersionedFiles.get_sha1s().""" result = {} for record in self.get_record_stream(keys, 'unordered', True): if record.sha1 != None: result[record.key] = record.sha1 else: if record.storage_kind != 'absent': result[record.key] = osutils.sha_string( record.get_bytes_as('fulltext')) return result def insert_record_stream(self, stream): """Insert a record stream into this container. :param stream: A stream of records to insert. :return: None :seealso VersionedFiles.get_record_stream: """ # XXX: Setting random_id=True makes # test_insert_record_stream_existing_keys fail for groupcompress and # groupcompress-nograph, this needs to be revisited while addressing # 'bzr branch' performance issues. for _ in self._insert_record_stream(stream, random_id=False): pass def _get_compressor_settings(self): if self._max_bytes_to_index is None: # TODO: VersionedFiles don't know about their containing # repository, so they don't have much of an idea about their # location. So for now, this is only a global option. c = config.GlobalConfig() val = c.get_user_option('bzr.groupcompress.max_bytes_to_index') if val is not None: try: val = int(val) except ValueError, e: trace.warning('Value for ' '"bzr.groupcompress.max_bytes_to_index"' ' %r is not an integer' % (val,)) val = None if val is None: val = self._DEFAULT_MAX_BYTES_TO_INDEX self._max_bytes_to_index = val return {'max_bytes_to_index': self._max_bytes_to_index} def _make_group_compressor(self): return GroupCompressor(self._get_compressor_settings()) def _insert_record_stream(self, stream, random_id=False, nostore_sha=None, reuse_blocks=True): """Internal core to insert a record stream into this container. This helper function has a different interface than insert_record_stream to allow add_lines to be minimal, but still return the needed data. :param stream: A stream of records to insert. :param nostore_sha: If the sha1 of a given text matches nostore_sha, raise ExistingContent, rather than committing the new text. :param reuse_blocks: If the source is streaming from groupcompress-blocks, just insert the blocks as-is, rather than expanding the texts and inserting again. :return: An iterator over the sha1 of the inserted records. :seealso insert_record_stream: :seealso add_lines: """ adapters = {} def get_adapter(adapter_key): try: return adapters[adapter_key] except KeyError: adapter_factory = adapter_registry.get(adapter_key) adapter = adapter_factory(self) adapters[adapter_key] = adapter return adapter # This will go up to fulltexts for gc to gc fetching, which isn't # ideal. self._compressor = self._make_group_compressor() self._unadded_refs = {} keys_to_add = [] def flush(): bytes_len, chunks = self._compressor.flush().to_chunks() self._compressor = self._make_group_compressor() # Note: At this point we still have 1 copy of the fulltext (in # record and the var 'bytes'), and this generates 2 copies of # the compressed text (one for bytes, one in chunks) # TODO: Push 'chunks' down into the _access api, so that we don't # have to double compressed memory here # TODO: Figure out how to indicate that we would be happy to free # the fulltext content at this point. Note that sometimes we # will want it later (streaming CHK pages), but most of the # time we won't (everything else) bytes = ''.join(chunks) del chunks index, start, length = self._access.add_raw_records( [(None, len(bytes))], bytes)[0] nodes = [] for key, reads, refs in keys_to_add: nodes.append((key, "%d %d %s" % (start, length, reads), refs)) self._index.add_records(nodes, random_id=random_id) self._unadded_refs = {} del keys_to_add[:] last_prefix = None max_fulltext_len = 0 max_fulltext_prefix = None insert_manager = None block_start = None block_length = None # XXX: TODO: remove this, it is just for safety checking for now inserted_keys = set() reuse_this_block = reuse_blocks for record in stream: # Raise an error when a record is missing. if record.storage_kind == 'absent': raise errors.RevisionNotPresent(record.key, self) if random_id: if record.key in inserted_keys: trace.note(gettext('Insert claimed random_id=True,' ' but then inserted %r two times'), record.key) continue inserted_keys.add(record.key) if reuse_blocks: # If the reuse_blocks flag is set, check to see if we can just # copy a groupcompress block as-is. # We only check on the first record (groupcompress-block) not # on all of the (groupcompress-block-ref) entries. # The reuse_this_block flag is then kept for as long as if record.storage_kind == 'groupcompress-block': # Check to see if we really want to re-use this block insert_manager = record._manager reuse_this_block = insert_manager.check_is_well_utilized() else: reuse_this_block = False if reuse_this_block: # We still want to reuse this block if record.storage_kind == 'groupcompress-block': # Insert the raw block into the target repo insert_manager = record._manager bytes = record._manager._block.to_bytes() _, start, length = self._access.add_raw_records( [(None, len(bytes))], bytes)[0] del bytes block_start = start block_length = length if record.storage_kind in ('groupcompress-block', 'groupcompress-block-ref'): if insert_manager is None: raise AssertionError('No insert_manager set') if insert_manager is not record._manager: raise AssertionError('insert_manager does not match' ' the current record, we cannot be positive' ' that the appropriate content was inserted.' ) value = "%d %d %d %d" % (block_start, block_length, record._start, record._end) nodes = [(record.key, value, (record.parents,))] # TODO: Consider buffering up many nodes to be added, not # sure how much overhead this has, but we're seeing # ~23s / 120s in add_records calls self._index.add_records(nodes, random_id=random_id) continue try: bytes = record.get_bytes_as('fulltext') except errors.UnavailableRepresentation: adapter_key = record.storage_kind, 'fulltext' adapter = get_adapter(adapter_key) bytes = adapter.get_bytes(record) if len(record.key) > 1: prefix = record.key[0] soft = (prefix == last_prefix) else: prefix = None soft = False if max_fulltext_len < len(bytes): max_fulltext_len = len(bytes) max_fulltext_prefix = prefix (found_sha1, start_point, end_point, type) = self._compressor.compress(record.key, bytes, record.sha1, soft=soft, nostore_sha=nostore_sha) # delta_ratio = float(len(bytes)) / (end_point - start_point) # Check if we want to continue to include that text if (prefix == max_fulltext_prefix and end_point < 2 * max_fulltext_len): # As long as we are on the same file_id, we will fill at least # 2 * max_fulltext_len start_new_block = False elif end_point > 4*1024*1024: start_new_block = True elif (prefix is not None and prefix != last_prefix and end_point > 2*1024*1024): start_new_block = True else: start_new_block = False last_prefix = prefix if start_new_block: self._compressor.pop_last() flush() max_fulltext_len = len(bytes) (found_sha1, start_point, end_point, type) = self._compressor.compress(record.key, bytes, record.sha1) if record.key[-1] is None: key = record.key[:-1] + ('sha1:' + found_sha1,) else: key = record.key self._unadded_refs[key] = record.parents yield found_sha1 as_st = static_tuple.StaticTuple.from_sequence if record.parents is not None: parents = as_st([as_st(p) for p in record.parents]) else: parents = None refs = static_tuple.StaticTuple(parents) keys_to_add.append((key, '%d %d' % (start_point, end_point), refs)) if len(keys_to_add): flush() self._compressor = None def iter_lines_added_or_present_in_keys(self, keys, pb=None): """Iterate over the lines in the versioned files from keys. This may return lines from other keys. Each item the returned iterator yields is a tuple of a line and a text version that that line is present in (not introduced in). Ordering of results is in whatever order is most suitable for the underlying storage format. If a progress bar is supplied, it may be used to indicate progress. The caller is responsible for cleaning up progress bars (because this is an iterator). NOTES: * Lines are normalised by the underlying store: they will all have \n terminators. * Lines are returned in arbitrary order. :return: An iterator over (line, key). """ keys = set(keys) total = len(keys) # we don't care about inclusions, the caller cares. # but we need to setup a list of records to visit. # we need key, position, length for key_idx, record in enumerate(self.get_record_stream(keys, 'unordered', True)): # XXX: todo - optimise to use less than full texts. key = record.key if pb is not None: pb.update('Walking content', key_idx, total) if record.storage_kind == 'absent': raise errors.RevisionNotPresent(key, self) lines = osutils.split_lines(record.get_bytes_as('fulltext')) for line in lines: yield line, key if pb is not None: pb.update('Walking content', total, total) def keys(self): """See VersionedFiles.keys.""" if 'evil' in debug.debug_flags: trace.mutter_callsite(2, "keys scales with size of history") sources = [self._index] + self._immediate_fallback_vfs result = set() for source in sources: result.update(source.keys()) return result class _GCBuildDetails(object): """A blob of data about the build details. This stores the minimal data, which then allows compatibility with the old api, without taking as much memory. """ __slots__ = ('_index', '_group_start', '_group_end', '_basis_end', '_delta_end', '_parents') method = 'group' compression_parent = None def __init__(self, parents, position_info): self._parents = parents (self._index, self._group_start, self._group_end, self._basis_end, self._delta_end) = position_info def __repr__(self): return '%s(%s, %s)' % (self.__class__.__name__, self.index_memo, self._parents) @property def index_memo(self): return (self._index, self._group_start, self._group_end, self._basis_end, self._delta_end) @property def record_details(self): return static_tuple.StaticTuple(self.method, None) def __getitem__(self, offset): """Compatibility thunk to act like a tuple.""" if offset == 0: return self.index_memo elif offset == 1: return self.compression_parent # Always None elif offset == 2: return self._parents elif offset == 3: return self.record_details else: raise IndexError('offset out of range') def __len__(self): return 4 class _GCGraphIndex(object): """Mapper from GroupCompressVersionedFiles needs into GraphIndex storage.""" def __init__(self, graph_index, is_locked, parents=True, add_callback=None, track_external_parent_refs=False, inconsistency_fatal=True, track_new_keys=False): """Construct a _GCGraphIndex on a graph_index. :param graph_index: An implementation of bzrlib.index.GraphIndex. :param is_locked: A callback, returns True if the index is locked and thus usable. :param parents: If True, record knits parents, if not do not record parents. :param add_callback: If not None, allow additions to the index and call this callback with a list of added GraphIndex nodes: [(node, value, node_refs), ...] :param track_external_parent_refs: As keys are added, keep track of the keys they reference, so that we can query get_missing_parents(), etc. :param inconsistency_fatal: When asked to add records that are already present, and the details are inconsistent with the existing record, raise an exception instead of warning (and skipping the record). """ self._add_callback = add_callback self._graph_index = graph_index self._parents = parents self.has_graph = parents self._is_locked = is_locked self._inconsistency_fatal = inconsistency_fatal # GroupCompress records tend to have the same 'group' start + offset # repeated over and over, this creates a surplus of ints self._int_cache = {} if track_external_parent_refs: self._key_dependencies = _KeyRefs( track_new_keys=track_new_keys) else: self._key_dependencies = None def add_records(self, records, random_id=False): """Add multiple records to the index. This function does not insert data into the Immutable GraphIndex backing the KnitGraphIndex, instead it prepares data for insertion by the caller and checks that it is safe to insert then calls self._add_callback with the prepared GraphIndex nodes. :param records: a list of tuples: (key, options, access_memo, parents). :param random_id: If True the ids being added were randomly generated and no check for existence will be performed. """ if not self._add_callback: raise errors.ReadOnlyError(self) # we hope there are no repositories with inconsistent parentage # anymore. changed = False keys = {} for (key, value, refs) in records: if not self._parents: if refs: for ref in refs: if ref: raise errors.KnitCorrupt(self, "attempt to add node with parents " "in parentless index.") refs = () changed = True keys[key] = (value, refs) # check for dups if not random_id: present_nodes = self._get_entries(keys) for (index, key, value, node_refs) in present_nodes: # Sometimes these are passed as a list rather than a tuple node_refs = static_tuple.as_tuples(node_refs) passed = static_tuple.as_tuples(keys[key]) if node_refs != passed[1]: details = '%s %s %s' % (key, (value, node_refs), passed) if self._inconsistency_fatal: raise errors.KnitCorrupt(self, "inconsistent details" " in add_records: %s" % details) else: trace.warning("inconsistent details in skipped" " record: %s", details) del keys[key] changed = True if changed: result = [] if self._parents: for key, (value, node_refs) in keys.iteritems(): result.append((key, value, node_refs)) else: for key, (value, node_refs) in keys.iteritems(): result.append((key, value)) records = result key_dependencies = self._key_dependencies if key_dependencies is not None: if self._parents: for key, value, refs in records: parents = refs[0] key_dependencies.add_references(key, parents) else: for key, value, refs in records: new_keys.add_key(key) self._add_callback(records) def _check_read(self): """Raise an exception if reads are not permitted.""" if not self._is_locked(): raise errors.ObjectNotLocked(self) def _check_write_ok(self): """Raise an exception if writes are not permitted.""" if not self._is_locked(): raise errors.ObjectNotLocked(self) def _get_entries(self, keys, check_present=False): """Get the entries for keys. Note: Callers are responsible for checking that the index is locked before calling this method. :param keys: An iterable of index key tuples. """ keys = set(keys) found_keys = set() if self._parents: for node in self._graph_index.iter_entries(keys): yield node found_keys.add(node[1]) else: # adapt parentless index to the rest of the code. for node in self._graph_index.iter_entries(keys): yield node[0], node[1], node[2], () found_keys.add(node[1]) if check_present: missing_keys = keys.difference(found_keys) if missing_keys: raise errors.RevisionNotPresent(missing_keys.pop(), self) def find_ancestry(self, keys): """See CombinedGraphIndex.find_ancestry""" return self._graph_index.find_ancestry(keys, 0) def get_parent_map(self, keys): """Get a map of the parents of keys. :param keys: The keys to look up parents for. :return: A mapping from keys to parents. Absent keys are absent from the mapping. """ self._check_read() nodes = self._get_entries(keys) result = {} if self._parents: for node in nodes: result[node[1]] = node[3][0] else: for node in nodes: result[node[1]] = None return result def get_missing_parents(self): """Return the keys of missing parents.""" # Copied from _KnitGraphIndex.get_missing_parents # We may have false positives, so filter those out. self._key_dependencies.satisfy_refs_for_keys( self.get_parent_map(self._key_dependencies.get_unsatisfied_refs())) return frozenset(self._key_dependencies.get_unsatisfied_refs()) def get_build_details(self, keys): """Get the various build details for keys. Ghosts are omitted from the result. :param keys: An iterable of keys. :return: A dict of key: (index_memo, compression_parent, parents, record_details). * index_memo: opaque structure to pass to read_records to extract the raw data * compression_parent: Content that this record is built upon, may be None * parents: Logical parents of this node * record_details: extra information about the content which needs to be passed to Factory.parse_record """ self._check_read() result = {} entries = self._get_entries(keys) for entry in entries: key = entry[1] if not self._parents: parents = None else: parents = entry[3][0] details = _GCBuildDetails(parents, self._node_to_position(entry)) result[key] = details return result def keys(self): """Get all the keys in the collection. The keys are not ordered. """ self._check_read() return [node[1] for node in self._graph_index.iter_all_entries()] def _node_to_position(self, node): """Convert an index value to position details.""" bits = node[2].split(' ') # It would be nice not to read the entire gzip. # start and stop are put into _int_cache because they are very common. # They define the 'group' that an entry is in, and many groups can have # thousands of objects. # Branching Launchpad, for example, saves ~600k integers, at 12 bytes # each, or about 7MB. Note that it might be even more when you consider # how PyInt is allocated in separate slabs. And you can't return a slab # to the OS if even 1 int on it is in use. Note though that Python uses # a LIFO when re-using PyInt slots, which might cause more # fragmentation. start = int(bits[0]) start = self._int_cache.setdefault(start, start) stop = int(bits[1]) stop = self._int_cache.setdefault(stop, stop) basis_end = int(bits[2]) delta_end = int(bits[3]) # We can't use StaticTuple here, because node[0] is a BTreeGraphIndex # instance... return (node[0], start, stop, basis_end, delta_end) def scan_unvalidated_index(self, graph_index): """Inform this _GCGraphIndex that there is an unvalidated index. This allows this _GCGraphIndex to keep track of any missing compression parents we may want to have filled in to make those indices valid. It also allows _GCGraphIndex to track any new keys. :param graph_index: A GraphIndex """ key_dependencies = self._key_dependencies if key_dependencies is None: return for node in graph_index.iter_all_entries(): # Add parent refs from graph_index (and discard parent refs # that the graph_index has). key_dependencies.add_references(node[1], node[3][0]) from bzrlib._groupcompress_py import ( apply_delta, apply_delta_to_source, encode_base128_int, decode_base128_int, decode_copy_instruction, LinesDeltaIndex, ) try: from bzrlib._groupcompress_pyx import ( apply_delta, apply_delta_to_source, DeltaIndex, encode_base128_int, decode_base128_int, ) GroupCompressor = PyrexGroupCompressor except ImportError, e: osutils.failed_to_load_extension(e) GroupCompressor = PythonGroupCompressor bzr-2.7.0/bzrlib/hashcache.py0000644000000000000000000002641111673403246014222 0ustar 00000000000000# Copyright (C) 2005-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import # TODO: Up-front, stat all files in order and remove those which are deleted or # out-of-date. Don't actually re-read them until they're needed. That ought # to bring all the inodes into core so that future stats to them are fast, and # it preserves the nice property that any caller will always get up-to-date # data except in unavoidable cases. # TODO: Perhaps return more details on the file to avoid statting it # again: nonexistent, file type, size, etc # TODO: Perhaps use a Python pickle instead of a text file; might be faster. CACHE_HEADER = "### bzr hashcache v5\n" import os import stat import time from bzrlib import ( atomicfile, errors, filters as _mod_filters, osutils, trace, ) FP_MTIME_COLUMN = 1 FP_CTIME_COLUMN = 2 FP_MODE_COLUMN = 5 class HashCache(object): """Cache for looking up file SHA-1. Files are considered to match the cached value if the fingerprint of the file has not changed. This includes its mtime, ctime, device number, inode number, and size. This should catch modifications or replacement of the file by a new one. This may not catch modifications that do not change the file's size and that occur within the resolution window of the timestamps. To handle this we specifically do not cache files which have changed since the start of the present second, since they could undetectably change again. This scheme may fail if the machine's clock steps backwards. Don't do that. This does not canonicalize the paths passed in; that should be done by the caller. _cache Indexed by path, points to a two-tuple of the SHA-1 of the file. and its fingerprint. stat_count number of times files have been statted hit_count number of times files have been retrieved from the cache, avoiding a re-read miss_count number of misses (times files have been completely re-read) """ needs_write = False def __init__(self, root, cache_file_name, mode=None, content_filter_stack_provider=None): """Create a hash cache in base dir, and set the file mode to mode. :param content_filter_stack_provider: a function that takes a path (relative to the top of the tree) and a file-id as parameters and returns a stack of ContentFilters. If None, no content filtering is performed. """ self.root = osutils.safe_unicode(root) self.root_utf8 = self.root.encode('utf8') # where is the filesystem encoding ? self.hit_count = 0 self.miss_count = 0 self.stat_count = 0 self.danger_count = 0 self.removed_count = 0 self.update_count = 0 self._cache = {} self._mode = mode self._cache_file_name = osutils.safe_unicode(cache_file_name) self._filter_provider = content_filter_stack_provider def cache_file_name(self): return self._cache_file_name def clear(self): """Discard all cached information. This does not reset the counters.""" if self._cache: self.needs_write = True self._cache = {} def scan(self): """Scan all files and remove entries where the cache entry is obsolete. Obsolete entries are those where the file has been modified or deleted since the entry was inserted. """ # FIXME optimisation opportunity, on linux [and check other oses]: # rather than iteritems order, stat in inode order. prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()] prep.sort() for inum, path, cache_entry in prep: abspath = osutils.pathjoin(self.root, path) fp = self._fingerprint(abspath) self.stat_count += 1 cache_fp = cache_entry[1] if (not fp) or (cache_fp != fp): # not here or not a regular file anymore self.removed_count += 1 self.needs_write = True del self._cache[path] def get_sha1(self, path, stat_value=None): """Return the sha1 of a file. """ if path.__class__ is str: abspath = osutils.pathjoin(self.root_utf8, path) else: abspath = osutils.pathjoin(self.root, path) self.stat_count += 1 file_fp = self._fingerprint(abspath, stat_value) if not file_fp: # not a regular file or not existing if path in self._cache: self.removed_count += 1 self.needs_write = True del self._cache[path] return None if path in self._cache: cache_sha1, cache_fp = self._cache[path] else: cache_sha1, cache_fp = None, None if cache_fp == file_fp: ## mutter("hashcache hit for %s %r -> %s", path, file_fp, cache_sha1) ## mutter("now = %s", time.time()) self.hit_count += 1 return cache_sha1 self.miss_count += 1 mode = file_fp[FP_MODE_COLUMN] if stat.S_ISREG(mode): if self._filter_provider is None: filters = [] else: filters = self._filter_provider(path=path, file_id=None) digest = self._really_sha1_file(abspath, filters) elif stat.S_ISLNK(mode): target = osutils.readlink(osutils.safe_unicode(abspath)) digest = osutils.sha_string(target.encode('UTF-8')) else: raise errors.BzrError("file %r: unknown file stat mode: %o" % (abspath, mode)) # window of 3 seconds to allow for 2s resolution on windows, # unsynchronized file servers, etc. cutoff = self._cutoff_time() if file_fp[FP_MTIME_COLUMN] >= cutoff \ or file_fp[FP_CTIME_COLUMN] >= cutoff: # changed too recently; can't be cached. we can # return the result and it could possibly be cached # next time. # # the point is that we only want to cache when we are sure that any # subsequent modifications of the file can be detected. If a # modification neither changes the inode, the device, the size, nor # the mode, then we can only distinguish it by time; therefore we # need to let sufficient time elapse before we may cache this entry # again. If we didn't do this, then, for example, a very quick 1 # byte replacement in the file might go undetected. ## mutter('%r modified too recently; not caching', path) self.danger_count += 1 if cache_fp: self.removed_count += 1 self.needs_write = True del self._cache[path] else: ## mutter('%r added to cache: now=%f, mtime=%d, ctime=%d', ## path, time.time(), file_fp[FP_MTIME_COLUMN], ## file_fp[FP_CTIME_COLUMN]) self.update_count += 1 self.needs_write = True self._cache[path] = (digest, file_fp) return digest def _really_sha1_file(self, abspath, filters): """Calculate the SHA1 of a file by reading the full text""" return _mod_filters.internal_size_sha_file_byname(abspath, filters)[1] def write(self): """Write contents of cache to file.""" outf = atomicfile.AtomicFile(self.cache_file_name(), 'wb', new_mode=self._mode) try: outf.write(CACHE_HEADER) for path, c in self._cache.iteritems(): line_info = [path.encode('utf-8'), '// ', c[0], ' '] line_info.append(' '.join([str(fld) for fld in c[1]])) line_info.append('\n') outf.write(''.join(line_info)) outf.commit() self.needs_write = False ## mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d", ## self.cache_file_name(), self.hit_count, self.miss_count, ## self.stat_count, ## self.danger_count, self.update_count) finally: outf.close() def read(self): """Reinstate cache from file. Overwrites existing cache. If the cache file has the wrong version marker, this just clears the cache.""" self._cache = {} fn = self.cache_file_name() try: inf = file(fn, 'rb', buffering=65000) except IOError, e: trace.mutter("failed to open %s: %s", fn, e) # better write it now so it is valid self.needs_write = True return hdr = inf.readline() if hdr != CACHE_HEADER: trace.mutter('cache header marker not found at top of %s;' ' discarding cache', fn) self.needs_write = True return for l in inf: pos = l.index('// ') path = l[:pos].decode('utf-8') if path in self._cache: trace.warning('duplicated path %r in cache' % path) continue pos += 3 fields = l[pos:].split(' ') if len(fields) != 7: trace.warning("bad line in hashcache: %r" % l) continue sha1 = fields[0] if len(sha1) != 40: trace.warning("bad sha1 in hashcache: %r" % sha1) continue fp = tuple(map(long, fields[1:])) self._cache[path] = (sha1, fp) # GZ 2009-09-20: Should really use a try/finally block to ensure close inf.close() self.needs_write = False def _cutoff_time(self): """Return cutoff time. Files modified more recently than this time are at risk of being undetectably modified and so can't be cached. """ return int(time.time()) - 3 def _fingerprint(self, abspath, stat_value=None): if stat_value is None: try: stat_value = os.lstat(abspath) except OSError: # might be missing, etc return None if stat.S_ISDIR(stat_value.st_mode): return None # we discard any high precision because it's not reliable; perhaps we # could do better on some systems? return (stat_value.st_size, long(stat_value.st_mtime), long(stat_value.st_ctime), stat_value.st_ino, stat_value.st_dev, stat_value.st_mode) bzr-2.7.0/bzrlib/help.py0000644000000000000000000001352111722672034013237 0ustar 00000000000000# Copyright (C) 2005-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import # TODO: Some way to get a list of external commands (defined by shell # scripts) so that they can be included in the help listing as well. # It should be enough to just list the plugin directory and look for # executable files with reasonable names. # TODO: `help commands --all` should show hidden commands import sys from bzrlib import ( commands as _mod_commands, errors, help_topics, osutils, plugin, ui, utextwrap, ) def help(topic=None, outfile=None): """Write the help for the specific topic to outfile""" if outfile is None: outfile = ui.ui_factory.make_output_stream() indices = HelpIndices() alias = _mod_commands.get_alias(topic) try: topics = indices.search(topic) shadowed_terms = [] for index, topic_obj in topics[1:]: shadowed_terms.append('%s%s' % (index.prefix, topic_obj.get_help_topic())) source = topics[0][1] outfile.write(source.get_help_text(shadowed_terms)) except errors.NoHelpTopic: if alias is None: raise if alias is not None: outfile.write("'bzr %s' is an alias for 'bzr %s'.\n" % (topic, " ".join(alias))) def help_commands(outfile=None): """List all commands""" if outfile is None: outfile = ui.ui_factory.make_output_stream() outfile.write(_help_commands_to_text('commands')) def _help_commands_to_text(topic): """Generate the help text for the list of commands""" out = [] if topic == 'hidden-commands': hidden = True else: hidden = False names = list(_mod_commands.all_command_names()) commands = ((n, _mod_commands.get_cmd_object(n)) for n in names) shown_commands = [(n, o) for n, o in commands if o.hidden == hidden] max_name = max(len(n) for n, o in shown_commands) indent = ' ' * (max_name + 1) width = osutils.terminal_width() if width is None: width = osutils.default_terminal_width # we need one extra space for terminals that wrap on last char width = width - 1 for cmd_name, cmd_object in sorted(shown_commands): plugin_name = cmd_object.plugin_name() if plugin_name is None: plugin_name = '' else: plugin_name = ' [%s]' % plugin_name cmd_help = cmd_object.help() if cmd_help: firstline = cmd_help.split('\n', 1)[0] else: firstline = '' helpstring = '%-*s %s%s' % (max_name, cmd_name, firstline, plugin_name) lines = utextwrap.wrap( helpstring, subsequent_indent=indent, width=width, break_long_words=False) for line in lines: out.append(line + '\n') return ''.join(out) help_topics.topic_registry.register("commands", _help_commands_to_text, "Basic help for all commands", help_topics.SECT_HIDDEN) help_topics.topic_registry.register("hidden-commands", _help_commands_to_text, "All hidden commands", help_topics.SECT_HIDDEN) class HelpIndices(object): """Maintainer of help topics across multiple indices. It is currently separate to the HelpTopicRegistry because of its ordered nature, but possibly we should instead structure it as a search within the registry and add ordering and searching facilities to the registry. The registry would probably need to be restructured to support that cleanly which is why this has been implemented in parallel even though it does as a result permit searching for help in indices which are not discoverable via 'help topics'. Each index has a unique prefix string, such as "commands", and contains help topics which can be listed or searched. """ def __init__(self): self.search_path = [ help_topics.HelpTopicIndex(), _mod_commands.HelpCommandIndex(), plugin.PluginsHelpIndex(), help_topics.ConfigOptionHelpIndex(), ] def _check_prefix_uniqueness(self): """Ensure that the index collection is able to differentiate safely.""" prefixes = {} for index in self.search_path: prefixes.setdefault(index.prefix, []).append(index) for prefix, indices in prefixes.items(): if len(indices) > 1: raise errors.DuplicateHelpPrefix(prefix) def search(self, topic): """Search for topic across the help search path. :param topic: A string naming the help topic to search for. :raises: NoHelpTopic if none of the indexs in search_path have topic. :return: A list of HelpTopics which matched 'topic'. """ self._check_prefix_uniqueness() result = [] for index in self.search_path: result.extend([(index, _topic) for _topic in index.get_topics(topic)]) if not result: raise errors.NoHelpTopic(topic) else: return result bzr-2.7.0/bzrlib/help_topics/0000755000000000000000000000000010727370342014245 5ustar 00000000000000bzr-2.7.0/bzrlib/hooks.py0000644000000000000000000004106711730054002013424 0ustar 00000000000000# Copyright (C) 2007-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Support for plugin hooking logic.""" from __future__ import absolute_import from bzrlib import ( registry, symbol_versioning, ) from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import textwrap from bzrlib import ( _format_version_tuple, errors, pyutils, ) from bzrlib.i18n import gettext """) class KnownHooksRegistry(registry.Registry): # known_hooks registry contains # tuple of (module, member name) which is the hook point # module where the specific hooks are defined # callable to get the empty specific Hooks for that attribute def register_lazy_hook(self, hook_module_name, hook_member_name, hook_factory_member_name): self.register_lazy((hook_module_name, hook_member_name), hook_module_name, hook_factory_member_name) def iter_parent_objects(self): """Yield (hook_key, (parent_object, attr)) tuples for every registered hook, where 'parent_object' is the object that holds the hook instance. This is useful for resetting/restoring all the hooks to a known state, as is done in bzrlib.tests.TestCase._clear_hooks. """ for key in self.keys(): yield key, self.key_to_parent_and_attribute(key) def key_to_parent_and_attribute(self, (module_name, member_name)): """Convert a known_hooks key to a (parent_obj, attr) pair. :param key: A tuple (module_name, member_name) as found in the keys of the known_hooks registry. :return: The parent_object of the hook and the name of the attribute on that parent object where the hook is kept. """ parent_mod, parent_member, attr = pyutils.calc_parent_name(module_name, member_name) return pyutils.get_named_object(parent_mod, parent_member), attr _builtin_known_hooks = ( ('bzrlib.branch', 'Branch.hooks', 'BranchHooks'), ('bzrlib.controldir', 'ControlDir.hooks', 'ControlDirHooks'), ('bzrlib.commands', 'Command.hooks', 'CommandHooks'), ('bzrlib.config', 'ConfigHooks', '_ConfigHooks'), ('bzrlib.info', 'hooks', 'InfoHooks'), ('bzrlib.lock', 'Lock.hooks', 'LockHooks'), ('bzrlib.merge', 'Merger.hooks', 'MergeHooks'), ('bzrlib.msgeditor', 'hooks', 'MessageEditorHooks'), ('bzrlib.mutabletree', 'MutableTree.hooks', 'MutableTreeHooks'), ('bzrlib.smart.client', '_SmartClient.hooks', 'SmartClientHooks'), ('bzrlib.smart.server', 'SmartTCPServer.hooks', 'SmartServerHooks'), ('bzrlib.status', 'hooks', 'StatusHooks'), ('bzrlib.transport', 'Transport.hooks', 'TransportHooks'), ('bzrlib.version_info_formats.format_rio', 'RioVersionInfoBuilder.hooks', 'RioVersionInfoBuilderHooks'), ('bzrlib.merge_directive', 'BaseMergeDirective.hooks', 'MergeDirectiveHooks'), ) known_hooks = KnownHooksRegistry() for (_hook_module, _hook_attribute, _hook_class) in _builtin_known_hooks: known_hooks.register_lazy_hook(_hook_module, _hook_attribute, _hook_class) del _builtin_known_hooks, _hook_module, _hook_attribute, _hook_class def known_hooks_key_to_object((module_name, member_name)): """Convert a known_hooks key to a object. :param key: A tuple (module_name, member_name) as found in the keys of the known_hooks registry. :return: The object this specifies. """ return pyutils.get_named_object(module_name, member_name) class Hooks(dict): """A dictionary mapping hook name to a list of callables. e.g. ['FOO'] Is the list of items to be called when the FOO hook is triggered. """ def __init__(self, module=None, member_name=None): """Create a new hooks dictionary. :param module: The module from which this hooks dictionary should be loaded (used for lazy hooks) :param member_name: Name under which this hooks dictionary should be loaded. (used for lazy hooks) """ dict.__init__(self) self._callable_names = {} self._lazy_callable_names = {} self._module = module self._member_name = member_name def add_hook(self, name, doc, introduced, deprecated=None): """Add a hook point to this dictionary. :param name: The name of the hook, for clients to use when registering. :param doc: The docs for the hook. :param introduced: When the hook was introduced (e.g. (0, 15)). :param deprecated: When the hook was deprecated, None for not-deprecated. """ if name in self: raise errors.DuplicateKey(name) if self._module: callbacks = _lazy_hooks.setdefault( (self._module, self._member_name, name), []) else: callbacks = None hookpoint = HookPoint(name=name, doc=doc, introduced=introduced, deprecated=deprecated, callbacks=callbacks) self[name] = hookpoint def docs(self): """Generate the documentation for this Hooks instance. This introspects all the individual hooks and returns their docs as well. """ hook_names = sorted(self.keys()) hook_docs = [] name = self.__class__.__name__ hook_docs.append(name) hook_docs.append("-"*len(name)) hook_docs.append("") for hook_name in hook_names: hook = self[hook_name] try: hook_docs.append(hook.docs()) except AttributeError: # legacy hook strings = [] strings.append(hook_name) strings.append("~" * len(hook_name)) strings.append("") strings.append("An old-style hook. For documentation see the __init__ " "method of '%s'\n" % (name,)) hook_docs.extend(strings) return "\n".join(hook_docs) def get_hook_name(self, a_callable): """Get the name for a_callable for UI display. If no name has been registered, the string 'No hook name' is returned. We use a fixed string rather than repr or the callables module because the code names are rarely meaningful for end users and this is not intended for debugging. """ name = self._callable_names.get(a_callable, None) if name is None and a_callable is not None: name = self._lazy_callable_names.get((a_callable.__module__, a_callable.__name__), None) if name is None: return 'No hook name' return name def install_named_hook_lazy(self, hook_name, callable_module, callable_member, name): """Install a_callable in to the hook hook_name lazily, and label it. :param hook_name: A hook name. See the __init__ method for the complete list of hooks. :param callable_module: Name of the module in which the callable is present. :param callable_member: Member name of the callable. :param name: A name to associate the callable with, to show users what is running. """ try: hook = self[hook_name] except KeyError: raise errors.UnknownHook(self.__class__.__name__, hook_name) try: hook_lazy = getattr(hook, "hook_lazy") except AttributeError: raise errors.UnsupportedOperation(self.install_named_hook_lazy, self) else: hook_lazy(callable_module, callable_member, name) if name is not None: self.name_hook_lazy(callable_module, callable_member, name) def install_named_hook(self, hook_name, a_callable, name): """Install a_callable in to the hook hook_name, and label it name. :param hook_name: A hook name. See the __init__ method for the complete list of hooks. :param a_callable: The callable to be invoked when the hook triggers. The exact signature will depend on the hook - see the __init__ method for details on each hook. :param name: A name to associate a_callable with, to show users what is running. """ try: hook = self[hook_name] except KeyError: raise errors.UnknownHook(self.__class__.__name__, hook_name) try: # list hooks, old-style, not yet deprecated but less useful. hook.append(a_callable) except AttributeError: hook.hook(a_callable, name) if name is not None: self.name_hook(a_callable, name) def uninstall_named_hook(self, hook_name, label): """Uninstall named hooks. :param hook_name: Hook point name :param label: Label of the callable to uninstall """ try: hook = self[hook_name] except KeyError: raise errors.UnknownHook(self.__class__.__name__, hook_name) try: uninstall = getattr(hook, "uninstall") except AttributeError: raise errors.UnsupportedOperation(self.uninstall_named_hook, self) else: uninstall(label) def name_hook(self, a_callable, name): """Associate name with a_callable to show users what is running.""" self._callable_names[a_callable] = name def name_hook_lazy(self, callable_module, callable_member, callable_name): self._lazy_callable_names[(callable_module, callable_member)]= \ callable_name class HookPoint(object): """A single hook that clients can register to be called back when it fires. :ivar name: The name of the hook. :ivar doc: The docs for using the hook. :ivar introduced: A version tuple specifying what version the hook was introduced in. None indicates an unknown version. :ivar deprecated: A version tuple specifying what version the hook was deprecated or superseded in. None indicates that the hook is not superseded or deprecated. If the hook is superseded then the doc should describe the recommended replacement hook to register for. """ def __init__(self, name, doc, introduced, deprecated=None, callbacks=None): """Create a HookPoint. :param name: The name of the hook, for clients to use when registering. :param doc: The docs for the hook. :param introduced: When the hook was introduced (e.g. (0, 15)). :param deprecated: When the hook was deprecated, None for not-deprecated. """ self.name = name self.__doc__ = doc self.introduced = introduced self.deprecated = deprecated if callbacks is None: self._callbacks = [] else: self._callbacks = callbacks def docs(self): """Generate the documentation for this HookPoint. :return: A string terminated in \n. """ strings = [] strings.append(self.name) strings.append('~'*len(self.name)) strings.append('') if self.introduced: introduced_string = _format_version_tuple(self.introduced) else: introduced_string = 'unknown' strings.append(gettext('Introduced in: %s') % introduced_string) if self.deprecated: deprecated_string = _format_version_tuple(self.deprecated) strings.append(gettext('Deprecated in: %s') % deprecated_string) strings.append('') strings.extend(textwrap.wrap(self.__doc__, break_long_words=False)) strings.append('') return '\n'.join(strings) def __eq__(self, other): return (type(other) == type(self) and other.__dict__ == self.__dict__) def hook_lazy(self, callback_module, callback_member, callback_label): """Lazily register a callback to be called when this HookPoint fires. :param callback_module: Module of the callable to use when this HookPoint fires. :param callback_member: Member name of the callback. :param callback_label: A label to show in the UI while this callback is processing. """ obj_getter = registry._LazyObjectGetter(callback_module, callback_member) self._callbacks.append((obj_getter, callback_label)) def hook(self, callback, callback_label): """Register a callback to be called when this HookPoint fires. :param callback: The callable to use when this HookPoint fires. :param callback_label: A label to show in the UI while this callback is processing. """ obj_getter = registry._ObjectGetter(callback) self._callbacks.append((obj_getter, callback_label)) def uninstall(self, label): """Uninstall the callback with the specified label. :param label: Label of the entry to uninstall """ entries_to_remove = [] for entry in self._callbacks: (entry_callback, entry_label) = entry if entry_label == label: entries_to_remove.append(entry) if entries_to_remove == []: raise KeyError("No entry with label %r" % label) for entry in entries_to_remove: self._callbacks.remove(entry) def __iter__(self): return (callback.get_obj() for callback, name in self._callbacks) def __len__(self): return len(self._callbacks) def __repr__(self): strings = [] strings.append("<%s(" % type(self).__name__) strings.append(self.name) strings.append("), callbacks=[") callbacks = self._callbacks for (callback, callback_name) in callbacks: strings.append(repr(callback.get_obj())) strings.append("(") strings.append(callback_name) strings.append("),") if len(callbacks) == 1: strings[-1] = ")" strings.append("]>") return ''.join(strings) _help_prefix = \ """ Hooks ===== Introduction ------------ A hook of type *xxx* of class *yyy* needs to be registered using:: yyy.hooks.install_named_hook("xxx", ...) See :doc:`Using hooks<../user-guide/hooks>` in the User Guide for examples. The class that contains each hook is given before the hooks it supplies. For instance, BranchHooks as the class is the hooks class for `bzrlib.branch.Branch.hooks`. Each description also indicates whether the hook runs on the client (the machine where bzr was invoked) or the server (the machine addressed by the branch URL). These may be, but are not necessarily, the same machine. Plugins (including hooks) are run on the server if all of these is true: * The connection is via a smart server (accessed with a URL starting with "bzr://", "bzr+ssh://" or "bzr+http://", or accessed via a "http://" URL when a smart server is available via HTTP). * The hook is either server specific or part of general infrastructure rather than client specific code (such as commit). """ def hooks_help_text(topic): segments = [_help_prefix] for hook_key in sorted(known_hooks.keys()): hooks = known_hooks_key_to_object(hook_key) segments.append(hooks.docs()) return '\n'.join(segments) # Lazily registered hooks. Maps (module, name, hook_name) tuples # to lists of tuples with objectgetters and names _lazy_hooks = {} def install_lazy_named_hook(hookpoints_module, hookpoints_name, hook_name, a_callable, name): """Install a callable in to a hook lazily, and label it name. :param hookpoints_module: Module name of the hook points. :param hookpoints_name: Name of the hook points. :param hook_name: A hook name. :param callable: a callable to call for the hook. :param name: A name to associate a_callable with, to show users what is running. """ key = (hookpoints_module, hookpoints_name, hook_name) obj_getter = registry._ObjectGetter(a_callable) _lazy_hooks.setdefault(key, []).append((obj_getter, name)) bzr-2.7.0/bzrlib/i18n.py0000644000000000000000000001401011734374643013070 0ustar 00000000000000# -*- coding: utf-8 -*- # # Copyright (C) 2007 Lukáš Lalinský # Copyright (C) 2007,2009 Alexander Belchenko # Copyright (C) 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # This module is copied from Bazaar Explorer and modified for bzr. """i18n and l10n support for Bazaar.""" from __future__ import absolute_import import gettext as _gettext import os import sys _translations = None def gettext(message): """Translate message. :returns: translated message as unicode. """ install() return _translations.ugettext(message) def ngettext(singular, plural, number): """Translate message with plural forms based on `number`. :param singular: English language message in singular form :param plural: English language message in plural form :param number: the number this message should be translated for :returns: translated message as unicode. """ install() return _translations.ungettext(singular, plural, number) def N_(msg): """Mark message for translation but don't translate it right away.""" return msg def gettext_per_paragraph(message): """Translate message per paragraph. :returns: concatenated translated message as unicode. """ install() paragraphs = message.split(u'\n\n') ugettext = _translations.ugettext # Be careful not to translate the empty string -- it holds the # meta data of the .po file. return u'\n\n'.join(ugettext(p) if p else u'' for p in paragraphs) def disable_i18n(): """Do not allow i18n to be enabled. Useful for third party users of bzrlib.""" global _translations _translations = _gettext.NullTranslations() def installed(): """Returns whether translations are in use or not.""" return _translations is not None def install(lang=None): """Enables gettext translations in bzr.""" global _translations if installed(): return _translations = install_translations(lang) def install_translations(lang=None, domain='bzr', locale_base=None): """Create a gettext translation object. :param lang: language to install. :param domain: translation domain to install. :param locale_base: plugins can specify their own directory. :returns: a gettext translations object to use """ if lang is None: lang = _get_current_locale() if lang is not None: languages = lang.split(':') else: languages = None translation = _gettext.translation( domain, localedir=_get_locale_dir(locale_base), languages=languages, fallback=True) return translation def add_fallback(fallback): """ Add a fallback translations object. Typically used by plugins. :param fallback: gettext.GNUTranslations object """ install() _translations.add_fallback(fallback) def uninstall(): """Disables gettext translations.""" global _translations _translations = None def _get_locale_dir(base): """Returns directory to find .mo translations file in, either local or system :param base: plugins can specify their own local directory """ fs_enc = sys.getfilesystemencoding() if getattr(sys, 'frozen', False): if base is None: base = os.path.dirname(unicode(sys.executable, fs_enc)) return os.path.join(base, u'locale') else: if base is None: base = os.path.dirname(unicode(__file__, fs_enc)) dirpath = os.path.realpath(os.path.join(base, u'locale')) if os.path.exists(dirpath): return dirpath return os.path.join(unicode(sys.prefix, fs_enc), u"share", u"locale") def _check_win32_locale(): for i in ('LANGUAGE','LC_ALL','LC_MESSAGES','LANG'): if os.environ.get(i): break else: lang = None import locale try: import ctypes except ImportError: # use only user's default locale lang = locale.getdefaultlocale()[0] else: # using ctypes to determine all locales lcid_user = ctypes.windll.kernel32.GetUserDefaultLCID() lcid_system = ctypes.windll.kernel32.GetSystemDefaultLCID() if lcid_user != lcid_system: lcid = [lcid_user, lcid_system] else: lcid = [lcid_user] lang = [locale.windows_locale.get(i) for i in lcid] lang = ':'.join([i for i in lang if i]) # set lang code for gettext if lang: os.environ['LANGUAGE'] = lang def _get_current_locale(): if not os.environ.get('LANGUAGE'): from bzrlib import config lang = config.GlobalStack().get('language') if lang: os.environ['LANGUAGE'] = lang return lang if sys.platform == 'win32': _check_win32_locale() for i in ('LANGUAGE','LC_ALL','LC_MESSAGES','LANG'): lang = os.environ.get(i) if lang: return lang return None def load_plugin_translations(domain): """Load the translations for a specific plugin. :param domain: Gettext domain name (usually 'bzr-PLUGINNAME') """ locale_base = os.path.dirname( unicode(__file__, sys.getfilesystemencoding())) translation = install_translations(domain=domain, locale_base=locale_base) add_fallback(translation) return translation bzr-2.7.0/bzrlib/identitymap.py0000644000000000000000000000503411673635356014651 0ustar 00000000000000# Copyright (C) 2005 Canonical Ltd # Authors: Robert Collins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """This module provides an IdentityMap.""" from __future__ import absolute_import from bzrlib import ( errors, ) class IdentityMap(object): """An in memory map from object id to instance. An IdentityMap maps from keys to single instances of objects in memory. We have explicit calls on the map for the root of each inheritance tree that is store in the map. Look for find_CLASS and add_CLASS methods. """ def add_weave(self, id, weave): """Add weave to the map with a given id.""" if self._weave_key(id) in self._map: raise errors.BzrError('weave %s already in the identity map' % id) self._map[self._weave_key(id)] = weave self._reverse_map[weave] = self._weave_key(id) def find_weave(self, id): """Return the weave for 'id', or None if it is not present.""" return self._map.get(self._weave_key(id), None) def __init__(self): super(IdentityMap, self).__init__() self._map = {} self._reverse_map = {} def remove_object(self, an_object): """Remove object from map.""" if isinstance(an_object, list): raise KeyError('%r not in identity map' % an_object) else: self._map.pop(self._reverse_map[an_object]) self._reverse_map.pop(an_object) def _weave_key(self, id): """Return the key for a weaves id.""" return "weave-" + id class NullIdentityMap(object): """A pretend in memory map from object id to instance. A NullIdentityMap is an Identity map that does not store anything in it. """ def add_weave(self, id, weave): """See IdentityMap.add_weave.""" def find_weave(self, id): """See IdentityMap.find_weave.""" return None bzr-2.7.0/bzrlib/ignores.py0000644000000000000000000001576111673646330013772 0ustar 00000000000000# Copyright (C) 2006-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Lists of ignore files, etc.""" from __future__ import absolute_import import errno import os from cStringIO import StringIO import bzrlib from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( atomicfile, config, globbing, trace, ) """) # ~/.bazaar/ignore will be filled out using # this ignore list, if it does not exist # please keep these sorted (in C locale order) to aid merging USER_DEFAULTS = [ '*.a', '*.o', '*.py[co]', '*.so', '*.sw[nop]', '*~', '.#*', '[#]*#', '__pycache__', 'bzr-orphans', ] def parse_ignore_file(f): """Read in all of the lines in the file and turn it into an ignore list Continue in the case of utf8 decoding errors, and emit a warning when such and error is found. Optimise for the common case -- no decoding errors. """ ignored = set() ignore_file = f.read() try: # Try and parse whole ignore file at once. unicode_lines = ignore_file.decode('utf8').split('\n') except UnicodeDecodeError: # Otherwise go though line by line and pick out the 'good' # decodable lines lines = ignore_file.split('\n') unicode_lines = [] for line_number, line in enumerate(lines): try: unicode_lines.append(line.decode('utf-8')) except UnicodeDecodeError: # report error about line (idx+1) trace.warning( '.bzrignore: On Line #%d, malformed utf8 character. ' 'Ignoring line.' % (line_number+1)) # Append each line to ignore list if it's not a comment line for line in unicode_lines: line = line.rstrip('\r\n') if not line or line.startswith('#'): continue ignored.add(globbing.normalize_pattern(line)) return ignored def get_user_ignores(): """Get the list of user ignored files, possibly creating it.""" path = config.user_ignore_config_filename() patterns = set(USER_DEFAULTS) try: f = open(path, 'rb') except (IOError, OSError), e: # open() shouldn't return an IOError without errno, but just in case err = getattr(e, 'errno', None) if err not in (errno.ENOENT,): raise # Create the ignore file, and just return the default # We want to ignore if we can't write to the file # since get_* should be a safe operation try: _set_user_ignores(USER_DEFAULTS) except (IOError, OSError), e: if e.errno not in (errno.EPERM,): raise return patterns try: return parse_ignore_file(f) finally: f.close() def _set_user_ignores(patterns): """Fill out the user ignore file with the given patterns This may raise an error if it doesn't have permission to write to the user ignore file. This is mostly used for testing, since it would be bad form to rewrite a user's ignore list. bzrlib only writes this file if it does not exist. """ ignore_path = config.user_ignore_config_filename() config.ensure_config_dir_exists() # Create an empty file f = open(ignore_path, 'wb') try: for pattern in patterns: f.write(pattern.encode('utf8') + '\n') finally: f.close() def add_unique_user_ignores(new_ignores): """Add entries to the user's ignore list if not present. :param new_ignores: A list of ignore patterns :return: The list of ignores that were added """ ignored = get_user_ignores() to_add = [] for ignore in new_ignores: ignore = globbing.normalize_pattern(ignore) if ignore not in ignored: ignored.add(ignore) to_add.append(ignore) if not to_add: return [] f = open(config.user_ignore_config_filename(), 'ab') try: for pattern in to_add: f.write(pattern.encode('utf8') + '\n') finally: f.close() return to_add _runtime_ignores = set() def add_runtime_ignores(ignores): """Add some ignore patterns that only exists in memory. This is used by some plugins that want bzr to ignore files, but don't want to change a users ignore list. (Such as a conversion script that needs to ignore temporary files, but does not want to modify the project's ignore list.) :param ignores: A list or generator of ignore patterns. :return: None """ global _runtime_ignores _runtime_ignores.update(set(ignores)) def get_runtime_ignores(): """Get the current set of runtime ignores.""" return _runtime_ignores def tree_ignores_add_patterns(tree, name_pattern_list): """Add more ignore patterns to the ignore file in a tree. If ignore file does not exist then it will be created. The ignore file will be automatically added under version control. :param tree: Working tree to update the ignore list. :param name_pattern_list: List of ignore patterns. :return: None """ # read in the existing ignores set ifn = tree.abspath(bzrlib.IGNORE_FILENAME) if tree.has_filename(ifn): f = open(ifn, 'rU') try: file_contents = f.read() # figure out what kind of line endings are used newline = getattr(f, 'newlines', None) if type(newline) is tuple: newline = newline[0] elif newline is None: newline = os.linesep finally: f.close() else: file_contents = "" newline = os.linesep sio = StringIO(file_contents) try: ignores = parse_ignore_file(sio) finally: sio.close() # write out the updated ignores set f = atomicfile.AtomicFile(ifn, 'wb') try: # write the original contents, preserving original line endings f.write(newline.join(file_contents.split('\n'))) if len(file_contents) > 0 and not file_contents.endswith('\n'): f.write(newline) for pattern in name_pattern_list: if not pattern in ignores: f.write(pattern.encode('utf-8')) f.write(newline) f.commit() finally: f.close() if not tree.path2id(bzrlib.IGNORE_FILENAME): tree.add([bzrlib.IGNORE_FILENAME]) bzr-2.7.0/bzrlib/index.py0000644000000000000000000023435211673635356013440 0ustar 00000000000000# Copyright (C) 2007-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Indexing facilities.""" from __future__ import absolute_import __all__ = [ 'CombinedGraphIndex', 'GraphIndex', 'GraphIndexBuilder', 'GraphIndexPrefixAdapter', 'InMemoryGraphIndex', ] from bisect import bisect_right from cStringIO import StringIO import re import sys from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( bisect_multi, revision as _mod_revision, trace, ) """) from bzrlib import ( debug, errors, ) from bzrlib.static_tuple import StaticTuple _HEADER_READV = (0, 200) _OPTION_KEY_ELEMENTS = "key_elements=" _OPTION_LEN = "len=" _OPTION_NODE_REFS = "node_ref_lists=" _SIGNATURE = "Bazaar Graph Index 1\n" _whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]') _newline_null_re = re.compile('[\n\0]') def _has_key_from_parent_map(self, key): """Check if this index has one key. If it's possible to check for multiple keys at once through calling get_parent_map that should be faster. """ return (key in self.get_parent_map([key])) def _missing_keys_from_parent_map(self, keys): return set(keys) - set(self.get_parent_map(keys)) class GraphIndexBuilder(object): """A builder that can build a GraphIndex. The resulting graph has the structure:: _SIGNATURE OPTIONS NODES NEWLINE _SIGNATURE := 'Bazaar Graph Index 1' NEWLINE OPTIONS := 'node_ref_lists=' DIGITS NEWLINE NODES := NODE* NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE KEY := Not-whitespace-utf8 ABSENT := 'a' REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1} REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)? REFERENCE := DIGITS ; digits is the byte offset in the index of the ; referenced key. VALUE := no-newline-no-null-bytes """ def __init__(self, reference_lists=0, key_elements=1): """Create a GraphIndex builder. :param reference_lists: The number of node references lists for each entry. :param key_elements: The number of bytestrings in each key. """ self.reference_lists = reference_lists # A dict of {key: (absent, ref_lists, value)} self._nodes = {} # Keys that are referenced but not actually present in this index self._absent_keys = set() self._nodes_by_key = None self._key_length = key_elements self._optimize_for_size = False self._combine_backing_indices = True def _check_key(self, key): """Raise BadIndexKey if key is not a valid key for this index.""" if type(key) not in (tuple, StaticTuple): raise errors.BadIndexKey(key) if self._key_length != len(key): raise errors.BadIndexKey(key) for element in key: if not element or _whitespace_re.search(element) is not None: raise errors.BadIndexKey(element) def _external_references(self): """Return references that are not present in this index. """ keys = set() refs = set() # TODO: JAM 2008-11-21 This makes an assumption about how the reference # lists are used. It is currently correct for pack-0.92 through # 1.9, which use the node references (3rd column) second # reference list as the compression parent. Perhaps this should # be moved into something higher up the stack, since it # makes assumptions about how the index is used. if self.reference_lists > 1: for node in self.iter_all_entries(): keys.add(node[1]) refs.update(node[3][1]) return refs - keys else: # If reference_lists == 0 there can be no external references, and # if reference_lists == 1, then there isn't a place to store the # compression parent return set() def _get_nodes_by_key(self): if self._nodes_by_key is None: nodes_by_key = {} if self.reference_lists: for key, (absent, references, value) in self._nodes.iteritems(): if absent: continue key_dict = nodes_by_key for subkey in key[:-1]: key_dict = key_dict.setdefault(subkey, {}) key_dict[key[-1]] = key, value, references else: for key, (absent, references, value) in self._nodes.iteritems(): if absent: continue key_dict = nodes_by_key for subkey in key[:-1]: key_dict = key_dict.setdefault(subkey, {}) key_dict[key[-1]] = key, value self._nodes_by_key = nodes_by_key return self._nodes_by_key def _update_nodes_by_key(self, key, value, node_refs): """Update the _nodes_by_key dict with a new key. For a key of (foo, bar, baz) create _nodes_by_key[foo][bar][baz] = key_value """ if self._nodes_by_key is None: return key_dict = self._nodes_by_key if self.reference_lists: key_value = StaticTuple(key, value, node_refs) else: key_value = StaticTuple(key, value) for subkey in key[:-1]: key_dict = key_dict.setdefault(subkey, {}) key_dict[key[-1]] = key_value def _check_key_ref_value(self, key, references, value): """Check that 'key' and 'references' are all valid. :param key: A key tuple. Must conform to the key interface (be a tuple, be of the right length, not have any whitespace or nulls in any key element.) :param references: An iterable of reference lists. Something like [[(ref, key)], [(ref, key), (other, key)]] :param value: The value associate with this key. Must not contain newlines or null characters. :return: (node_refs, absent_references) * node_refs: basically a packed form of 'references' where all iterables are tuples * absent_references: reference keys that are not in self._nodes. This may contain duplicates if the same key is referenced in multiple lists. """ as_st = StaticTuple.from_sequence self._check_key(key) if _newline_null_re.search(value) is not None: raise errors.BadIndexValue(value) if len(references) != self.reference_lists: raise errors.BadIndexValue(references) node_refs = [] absent_references = [] for reference_list in references: for reference in reference_list: # If reference *is* in self._nodes, then we know it has already # been checked. if reference not in self._nodes: self._check_key(reference) absent_references.append(reference) reference_list = as_st([as_st(ref).intern() for ref in reference_list]) node_refs.append(reference_list) return as_st(node_refs), absent_references def add_node(self, key, value, references=()): """Add a node to the index. :param key: The key. keys are non-empty tuples containing as many whitespace-free utf8 bytestrings as the key length defined for this index. :param references: An iterable of iterables of keys. Each is a reference to another key. :param value: The value to associate with the key. It may be any bytes as long as it does not contain \\0 or \\n. """ (node_refs, absent_references) = self._check_key_ref_value(key, references, value) if key in self._nodes and self._nodes[key][0] != 'a': raise errors.BadIndexDuplicateKey(key, self) for reference in absent_references: # There may be duplicates, but I don't think it is worth worrying # about self._nodes[reference] = ('a', (), '') self._absent_keys.update(absent_references) self._absent_keys.discard(key) self._nodes[key] = ('', node_refs, value) if self._nodes_by_key is not None and self._key_length > 1: self._update_nodes_by_key(key, value, node_refs) def clear_cache(self): """See GraphIndex.clear_cache() This is a no-op, but we need the api to conform to a generic 'Index' abstraction. """ def finish(self): """Finish the index. :returns: cStringIO holding the full context of the index as it should be written to disk. """ lines = [_SIGNATURE] lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n') lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n') key_count = len(self._nodes) - len(self._absent_keys) lines.append(_OPTION_LEN + str(key_count) + '\n') prefix_length = sum(len(x) for x in lines) # references are byte offsets. To avoid having to do nasty # polynomial work to resolve offsets (references to later in the # file cannot be determined until all the inbetween references have # been calculated too) we pad the offsets with 0's to make them be # of consistent length. Using binary offsets would break the trivial # file parsing. # to calculate the width of zero's needed we do three passes: # one to gather all the non-reference data and the number of references. # one to pad all the data with reference-length and determine entry # addresses. # One to serialise. # forward sorted by key. In future we may consider topological sorting, # at the cost of table scans for direct lookup, or a second index for # direct lookup nodes = sorted(self._nodes.items()) # if we do not prepass, we don't know how long it will be up front. expected_bytes = None # we only need to pre-pass if we have reference lists at all. if self.reference_lists: key_offset_info = [] non_ref_bytes = prefix_length total_references = 0 # TODO use simple multiplication for the constants in this loop. for key, (absent, references, value) in nodes: # record the offset known *so far* for this key: # the non reference bytes to date, and the total references to # date - saves reaccumulating on the second pass key_offset_info.append((key, non_ref_bytes, total_references)) # key is literal, value is literal, there are 3 null's, 1 NL # key is variable length tuple, \x00 between elements non_ref_bytes += sum(len(element) for element in key) if self._key_length > 1: non_ref_bytes += self._key_length - 1 # value is literal bytes, there are 3 null's, 1 NL. non_ref_bytes += len(value) + 3 + 1 # one byte for absent if set. if absent: non_ref_bytes += 1 elif self.reference_lists: # (ref_lists -1) tabs non_ref_bytes += self.reference_lists - 1 # (ref-1 cr's per ref_list) for ref_list in references: # how many references across the whole file? total_references += len(ref_list) # accrue reference separators if ref_list: non_ref_bytes += len(ref_list) - 1 # how many digits are needed to represent the total byte count? digits = 1 possible_total_bytes = non_ref_bytes + total_references*digits while 10 ** digits < possible_total_bytes: digits += 1 possible_total_bytes = non_ref_bytes + total_references*digits expected_bytes = possible_total_bytes + 1 # terminating newline # resolve key addresses. key_addresses = {} for key, non_ref_bytes, total_references in key_offset_info: key_addresses[key] = non_ref_bytes + total_references*digits # serialise format_string = '%%0%sd' % digits for key, (absent, references, value) in nodes: flattened_references = [] for ref_list in references: ref_addresses = [] for reference in ref_list: ref_addresses.append(format_string % key_addresses[reference]) flattened_references.append('\r'.join(ref_addresses)) string_key = '\x00'.join(key) lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent, '\t'.join(flattened_references), value)) lines.append('\n') result = StringIO(''.join(lines)) if expected_bytes and len(result.getvalue()) != expected_bytes: raise errors.BzrError('Failed index creation. Internal error:' ' mismatched output length and expected length: %d %d' % (len(result.getvalue()), expected_bytes)) return result def set_optimize(self, for_size=None, combine_backing_indices=None): """Change how the builder tries to optimize the result. :param for_size: Tell the builder to try and make the index as small as possible. :param combine_backing_indices: If the builder spills to disk to save memory, should the on-disk indices be combined. Set to True if you are going to be probing the index, but to False if you are not. (If you are not querying, then the time spent combining is wasted.) :return: None """ # GraphIndexBuilder itself doesn't pay attention to the flag yet, but # other builders do. if for_size is not None: self._optimize_for_size = for_size if combine_backing_indices is not None: self._combine_backing_indices = combine_backing_indices def find_ancestry(self, keys, ref_list_num): """See CombinedGraphIndex.find_ancestry()""" pending = set(keys) parent_map = {} missing_keys = set() while pending: next_pending = set() for _, key, value, ref_lists in self.iter_entries(pending): parent_keys = ref_lists[ref_list_num] parent_map[key] = parent_keys next_pending.update([p for p in parent_keys if p not in parent_map]) missing_keys.update(pending.difference(parent_map)) pending = next_pending return parent_map, missing_keys class GraphIndex(object): """An index for data with embedded graphs. The index maps keys to a list of key reference lists, and a value. Each node has the same number of key reference lists. Each key reference list can be empty or an arbitrary length. The value is an opaque NULL terminated string without any newlines. The storage of the index is hidden in the interface: keys and key references are always tuples of bytestrings, never the internal representation (e.g. dictionary offsets). It is presumed that the index will not be mutated - it is static data. Successive iter_all_entries calls will read the entire index each time. Additionally, iter_entries calls will read the index linearly until the desired keys are found. XXX: This must be fixed before the index is suitable for production use. :XXX """ def __init__(self, transport, name, size, unlimited_cache=False, offset=0): """Open an index called name on transport. :param transport: A bzrlib.transport.Transport. :param name: A path to provide to transport API calls. :param size: The size of the index in bytes. This is used for bisection logic to perform partial index reads. While the size could be obtained by statting the file this introduced an additional round trip as well as requiring stat'able transports, both of which are avoided by having it supplied. If size is None, then bisection support will be disabled and accessing the index will just stream all the data. :param offset: Instead of starting the index data at offset 0, start it at an arbitrary offset. """ self._transport = transport self._name = name # Becomes a dict of key:(value, reference-list-byte-locations) used by # the bisection interface to store parsed but not resolved keys. self._bisect_nodes = None # Becomes a dict of key:(value, reference-list-keys) which are ready to # be returned directly to callers. self._nodes = None # a sorted list of slice-addresses for the parsed bytes of the file. # e.g. (0,1) would mean that byte 0 is parsed. self._parsed_byte_map = [] # a sorted list of keys matching each slice address for parsed bytes # e.g. (None, 'foo@bar') would mean that the first byte contained no # key, and the end byte of the slice is the of the data for 'foo@bar' self._parsed_key_map = [] self._key_count = None self._keys_by_offset = None self._nodes_by_key = None self._size = size # The number of bytes we've read so far in trying to process this file self._bytes_read = 0 self._base_offset = offset def __eq__(self, other): """Equal when self and other were created with the same parameters.""" return ( type(self) == type(other) and self._transport == other._transport and self._name == other._name and self._size == other._size) def __ne__(self, other): return not self.__eq__(other) def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._transport.abspath(self._name)) def _buffer_all(self, stream=None): """Buffer all the index data. Mutates self._nodes and self.keys_by_offset. """ if self._nodes is not None: # We already did this return if 'index' in debug.debug_flags: trace.mutter('Reading entire index %s', self._transport.abspath(self._name)) if stream is None: stream = self._transport.get(self._name) if self._base_offset != 0: # This is wasteful, but it is better than dealing with # adjusting all the offsets, etc. stream = StringIO(stream.read()[self._base_offset:]) self._read_prefix(stream) self._expected_elements = 3 + self._key_length line_count = 0 # raw data keyed by offset self._keys_by_offset = {} # ready-to-return key:value or key:value, node_ref_lists self._nodes = {} self._nodes_by_key = None trailers = 0 pos = stream.tell() lines = stream.read().split('\n') # GZ 2009-09-20: Should really use a try/finally block to ensure close stream.close() del lines[-1] _, _, _, trailers = self._parse_lines(lines, pos) for key, absent, references, value in self._keys_by_offset.itervalues(): if absent: continue # resolve references: if self.node_ref_lists: node_value = (value, self._resolve_references(references)) else: node_value = value self._nodes[key] = node_value # cache the keys for quick set intersections if trailers != 1: # there must be one line - the empty trailer line. raise errors.BadIndexData(self) def clear_cache(self): """Clear out any cached/memoized values. This can be called at any time, but generally it is used when we have extracted some information, but don't expect to be requesting any more from this index. """ def external_references(self, ref_list_num): """Return references that are not present in this index. """ self._buffer_all() if ref_list_num + 1 > self.node_ref_lists: raise ValueError('No ref list %d, index has %d ref lists' % (ref_list_num, self.node_ref_lists)) refs = set() nodes = self._nodes for key, (value, ref_lists) in nodes.iteritems(): ref_list = ref_lists[ref_list_num] refs.update([ref for ref in ref_list if ref not in nodes]) return refs def _get_nodes_by_key(self): if self._nodes_by_key is None: nodes_by_key = {} if self.node_ref_lists: for key, (value, references) in self._nodes.iteritems(): key_dict = nodes_by_key for subkey in key[:-1]: key_dict = key_dict.setdefault(subkey, {}) key_dict[key[-1]] = key, value, references else: for key, value in self._nodes.iteritems(): key_dict = nodes_by_key for subkey in key[:-1]: key_dict = key_dict.setdefault(subkey, {}) key_dict[key[-1]] = key, value self._nodes_by_key = nodes_by_key return self._nodes_by_key def iter_all_entries(self): """Iterate over all keys within the index. :return: An iterable of (index, key, value) or (index, key, value, reference_lists). The former tuple is used when there are no reference lists in the index, making the API compatible with simple key:value index types. There is no defined order for the result iteration - it will be in the most efficient order for the index. """ if 'evil' in debug.debug_flags: trace.mutter_callsite(3, "iter_all_entries scales with size of history.") if self._nodes is None: self._buffer_all() if self.node_ref_lists: for key, (value, node_ref_lists) in self._nodes.iteritems(): yield self, key, value, node_ref_lists else: for key, value in self._nodes.iteritems(): yield self, key, value def _read_prefix(self, stream): signature = stream.read(len(self._signature())) if not signature == self._signature(): raise errors.BadIndexFormatSignature(self._name, GraphIndex) options_line = stream.readline() if not options_line.startswith(_OPTION_NODE_REFS): raise errors.BadIndexOptions(self) try: self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1]) except ValueError: raise errors.BadIndexOptions(self) options_line = stream.readline() if not options_line.startswith(_OPTION_KEY_ELEMENTS): raise errors.BadIndexOptions(self) try: self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1]) except ValueError: raise errors.BadIndexOptions(self) options_line = stream.readline() if not options_line.startswith(_OPTION_LEN): raise errors.BadIndexOptions(self) try: self._key_count = int(options_line[len(_OPTION_LEN):-1]) except ValueError: raise errors.BadIndexOptions(self) def _resolve_references(self, references): """Return the resolved key references for references. References are resolved by looking up the location of the key in the _keys_by_offset map and substituting the key name, preserving ordering. :param references: An iterable of iterables of key locations. e.g. [[123, 456], [123]] :return: A tuple of tuples of keys. """ node_refs = [] for ref_list in references: node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list])) return tuple(node_refs) def _find_index(self, range_map, key): """Helper for the _parsed_*_index calls. Given a range map - [(start, end), ...], finds the index of the range in the map for key if it is in the map, and if it is not there, the immediately preceeding range in the map. """ result = bisect_right(range_map, key) - 1 if result + 1 < len(range_map): # check the border condition, it may be in result + 1 if range_map[result + 1][0] == key[0]: return result + 1 return result def _parsed_byte_index(self, offset): """Return the index of the entry immediately before offset. e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that there is one unparsed byte (the 11th, addressed as[10]). then: asking for 0 will return 0 asking for 10 will return 0 asking for 11 will return 1 asking for 12 will return 1 """ key = (offset, 0) return self._find_index(self._parsed_byte_map, key) def _parsed_key_index(self, key): """Return the index of the entry immediately before key. e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed, meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive have been parsed, then: asking for '' will return 0 asking for 'a' will return 0 asking for 'b' will return 1 asking for 'e' will return 1 """ search_key = (key, None) return self._find_index(self._parsed_key_map, search_key) def _is_parsed(self, offset): """Returns True if offset has been parsed.""" index = self._parsed_byte_index(offset) if index == len(self._parsed_byte_map): return offset < self._parsed_byte_map[index - 1][1] start, end = self._parsed_byte_map[index] return offset >= start and offset < end def _iter_entries_from_total_buffer(self, keys): """Iterate over keys when the entire index is parsed.""" # Note: See the note in BTreeBuilder.iter_entries for why we don't use # .intersection() here nodes = self._nodes keys = [key for key in keys if key in nodes] if self.node_ref_lists: for key in keys: value, node_refs = nodes[key] yield self, key, value, node_refs else: for key in keys: yield self, key, nodes[key] def iter_entries(self, keys): """Iterate over keys within the index. :param keys: An iterable providing the keys to be retrieved. :return: An iterable as per iter_all_entries, but restricted to the keys supplied. No additional keys will be returned, and every key supplied that is in the index will be returned. """ keys = set(keys) if not keys: return [] if self._size is None and self._nodes is None: self._buffer_all() # We fit about 20 keys per minimum-read (4K), so if we are looking for # more than 1/20th of the index its likely (assuming homogenous key # spread) that we'll read the entire index. If we're going to do that, # buffer the whole thing. A better analysis might take key spread into # account - but B+Tree indices are better anyway. # We could look at all data read, and use a threshold there, which will # trigger on ancestry walks, but that is not yet fully mapped out. if self._nodes is None and len(keys) * 20 > self.key_count(): self._buffer_all() if self._nodes is not None: return self._iter_entries_from_total_buffer(keys) else: return (result[1] for result in bisect_multi.bisect_multi_bytes( self._lookup_keys_via_location, self._size, keys)) def iter_entries_prefix(self, keys): """Iterate over keys within the index using prefix matching. Prefix matching is applied within the tuple of a key, not to within the bytestring of each key element. e.g. if you have the keys ('foo', 'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then only the former key is returned. WARNING: Note that this method currently causes a full index parse unconditionally (which is reasonably appropriate as it is a means for thunking many small indices into one larger one and still supplies iter_all_entries at the thunk layer). :param keys: An iterable providing the key prefixes to be retrieved. Each key prefix takes the form of a tuple the length of a key, but with the last N elements 'None' rather than a regular bytestring. The first element cannot be 'None'. :return: An iterable as per iter_all_entries, but restricted to the keys with a matching prefix to those supplied. No additional keys will be returned, and every match that is in the index will be returned. """ keys = set(keys) if not keys: return # load data - also finds key lengths if self._nodes is None: self._buffer_all() if self._key_length == 1: for key in keys: # sanity check if key[0] is None: raise errors.BadIndexKey(key) if len(key) != self._key_length: raise errors.BadIndexKey(key) if self.node_ref_lists: value, node_refs = self._nodes[key] yield self, key, value, node_refs else: yield self, key, self._nodes[key] return nodes_by_key = self._get_nodes_by_key() for key in keys: # sanity check if key[0] is None: raise errors.BadIndexKey(key) if len(key) != self._key_length: raise errors.BadIndexKey(key) # find what it refers to: key_dict = nodes_by_key elements = list(key) # find the subdict whose contents should be returned. try: while len(elements) and elements[0] is not None: key_dict = key_dict[elements[0]] elements.pop(0) except KeyError: # a non-existant lookup. continue if len(elements): dicts = [key_dict] while dicts: key_dict = dicts.pop(-1) # can't be empty or would not exist item, value = key_dict.iteritems().next() if type(value) == dict: # push keys dicts.extend(key_dict.itervalues()) else: # yield keys for value in key_dict.itervalues(): # each value is the key:value:node refs tuple # ready to yield. yield (self, ) + value else: # the last thing looked up was a terminal element yield (self, ) + key_dict def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys): """See BTreeIndex._find_ancestors.""" # The api can be implemented as a trivial overlay on top of # iter_entries, it is not an efficient implementation, but it at least # gets the job done. found_keys = set() search_keys = set() for index, key, value, refs in self.iter_entries(keys): parent_keys = refs[ref_list_num] found_keys.add(key) parent_map[key] = parent_keys search_keys.update(parent_keys) # Figure out what, if anything, was missing missing_keys.update(set(keys).difference(found_keys)) search_keys = search_keys.difference(parent_map) return search_keys def key_count(self): """Return an estimate of the number of keys in this index. For GraphIndex the estimate is exact. """ if self._key_count is None: self._read_and_parse([_HEADER_READV]) return self._key_count def _lookup_keys_via_location(self, location_keys): """Public interface for implementing bisection. If _buffer_all has been called, then all the data for the index is in memory, and this method should not be called, as it uses a separate cache because it cannot pre-resolve all indices, which buffer_all does for performance. :param location_keys: A list of location(byte offset), key tuples. :return: A list of (location_key, result) tuples as expected by bzrlib.bisect_multi.bisect_multi_bytes. """ # Possible improvements: # - only bisect lookup each key once # - sort the keys first, and use that to reduce the bisection window # ----- # this progresses in three parts: # read data # parse it # attempt to answer the question from the now in memory data. # build the readv request # for each location, ask for 800 bytes - much more than rows we've seen # anywhere. readv_ranges = [] for location, key in location_keys: # can we answer from cache? if self._bisect_nodes and key in self._bisect_nodes: # We have the key parsed. continue index = self._parsed_key_index(key) if (len(self._parsed_key_map) and self._parsed_key_map[index][0] <= key and (self._parsed_key_map[index][1] >= key or # end of the file has been parsed self._parsed_byte_map[index][1] == self._size)): # the key has been parsed, so no lookup is needed even if its # not present. continue # - if we have examined this part of the file already - yes index = self._parsed_byte_index(location) if (len(self._parsed_byte_map) and self._parsed_byte_map[index][0] <= location and self._parsed_byte_map[index][1] > location): # the byte region has been parsed, so no read is needed. continue length = 800 if location + length > self._size: length = self._size - location # todo, trim out parsed locations. if length > 0: readv_ranges.append((location, length)) # read the header if needed if self._bisect_nodes is None: readv_ranges.append(_HEADER_READV) self._read_and_parse(readv_ranges) result = [] if self._nodes is not None: # _read_and_parse triggered a _buffer_all because we requested the # whole data range for location, key in location_keys: if key not in self._nodes: # not present result.append(((location, key), False)) elif self.node_ref_lists: value, refs = self._nodes[key] result.append(((location, key), (self, key, value, refs))) else: result.append(((location, key), (self, key, self._nodes[key]))) return result # generate results: # - figure out <, >, missing, present # - result present references so we can return them. # keys that we cannot answer until we resolve references pending_references = [] pending_locations = set() for location, key in location_keys: # can we answer from cache? if key in self._bisect_nodes: # the key has been parsed, so no lookup is needed if self.node_ref_lists: # the references may not have been all parsed. value, refs = self._bisect_nodes[key] wanted_locations = [] for ref_list in refs: for ref in ref_list: if ref not in self._keys_by_offset: wanted_locations.append(ref) if wanted_locations: pending_locations.update(wanted_locations) pending_references.append((location, key)) continue result.append(((location, key), (self, key, value, self._resolve_references(refs)))) else: result.append(((location, key), (self, key, self._bisect_nodes[key]))) continue else: # has the region the key should be in, been parsed? index = self._parsed_key_index(key) if (self._parsed_key_map[index][0] <= key and (self._parsed_key_map[index][1] >= key or # end of the file has been parsed self._parsed_byte_map[index][1] == self._size)): result.append(((location, key), False)) continue # no, is the key above or below the probed location: # get the range of the probed & parsed location index = self._parsed_byte_index(location) # if the key is below the start of the range, its below if key < self._parsed_key_map[index][0]: direction = -1 else: direction = +1 result.append(((location, key), direction)) readv_ranges = [] # lookup data to resolve references for location in pending_locations: length = 800 if location + length > self._size: length = self._size - location # TODO: trim out parsed locations (e.g. if the 800 is into the # parsed region trim it, and dont use the adjust_for_latency # facility) if length > 0: readv_ranges.append((location, length)) self._read_and_parse(readv_ranges) if self._nodes is not None: # The _read_and_parse triggered a _buffer_all, grab the data and # return it for location, key in pending_references: value, refs = self._nodes[key] result.append(((location, key), (self, key, value, refs))) return result for location, key in pending_references: # answer key references we had to look-up-late. value, refs = self._bisect_nodes[key] result.append(((location, key), (self, key, value, self._resolve_references(refs)))) return result def _parse_header_from_bytes(self, bytes): """Parse the header from a region of bytes. :param bytes: The data to parse. :return: An offset, data tuple such as readv yields, for the unparsed data. (which may length 0). """ signature = bytes[0:len(self._signature())] if not signature == self._signature(): raise errors.BadIndexFormatSignature(self._name, GraphIndex) lines = bytes[len(self._signature()):].splitlines() options_line = lines[0] if not options_line.startswith(_OPTION_NODE_REFS): raise errors.BadIndexOptions(self) try: self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):]) except ValueError: raise errors.BadIndexOptions(self) options_line = lines[1] if not options_line.startswith(_OPTION_KEY_ELEMENTS): raise errors.BadIndexOptions(self) try: self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):]) except ValueError: raise errors.BadIndexOptions(self) options_line = lines[2] if not options_line.startswith(_OPTION_LEN): raise errors.BadIndexOptions(self) try: self._key_count = int(options_line[len(_OPTION_LEN):]) except ValueError: raise errors.BadIndexOptions(self) # calculate the bytes we have processed header_end = (len(signature) + len(lines[0]) + len(lines[1]) + len(lines[2]) + 3) self._parsed_bytes(0, None, header_end, None) # setup parsing state self._expected_elements = 3 + self._key_length # raw data keyed by offset self._keys_by_offset = {} # keys with the value and node references self._bisect_nodes = {} return header_end, bytes[header_end:] def _parse_region(self, offset, data): """Parse node data returned from a readv operation. :param offset: The byte offset the data starts at. :param data: The data to parse. """ # trim the data. # end first: end = offset + len(data) high_parsed = offset while True: # Trivial test - if the current index's end is within the # low-matching parsed range, we're done. index = self._parsed_byte_index(high_parsed) if end < self._parsed_byte_map[index][1]: return # print "[%d:%d]" % (offset, end), \ # self._parsed_byte_map[index:index + 2] high_parsed, last_segment = self._parse_segment( offset, data, end, index) if last_segment: return def _parse_segment(self, offset, data, end, index): """Parse one segment of data. :param offset: Where 'data' begins in the file. :param data: Some data to parse a segment of. :param end: Where data ends :param index: The current index into the parsed bytes map. :return: True if the parsed segment is the last possible one in the range of data. :return: high_parsed_byte, last_segment. high_parsed_byte is the location of the highest parsed byte in this segment, last_segment is True if the parsed segment is the last possible one in the data block. """ # default is to use all data trim_end = None # accomodate overlap with data before this. if offset < self._parsed_byte_map[index][1]: # overlaps the lower parsed region # skip the parsed data trim_start = self._parsed_byte_map[index][1] - offset # don't trim the start for \n start_adjacent = True elif offset == self._parsed_byte_map[index][1]: # abuts the lower parsed region # use all data trim_start = None # do not trim anything start_adjacent = True else: # does not overlap the lower parsed region # use all data trim_start = None # but trim the leading \n start_adjacent = False if end == self._size: # lines up to the end of all data: # use it all trim_end = None # do not strip to the last \n end_adjacent = True last_segment = True elif index + 1 == len(self._parsed_byte_map): # at the end of the parsed data # use it all trim_end = None # but strip to the last \n end_adjacent = False last_segment = True elif end == self._parsed_byte_map[index + 1][0]: # buts up against the next parsed region # use it all trim_end = None # do not strip to the last \n end_adjacent = True last_segment = True elif end > self._parsed_byte_map[index + 1][0]: # overlaps into the next parsed region # only consider the unparsed data trim_end = self._parsed_byte_map[index + 1][0] - offset # do not strip to the last \n as we know its an entire record end_adjacent = True last_segment = end < self._parsed_byte_map[index + 1][1] else: # does not overlap into the next region # use it all trim_end = None # but strip to the last \n end_adjacent = False last_segment = True # now find bytes to discard if needed if not start_adjacent: # work around python bug in rfind if trim_start is None: trim_start = data.find('\n') + 1 else: trim_start = data.find('\n', trim_start) + 1 if not (trim_start != 0): raise AssertionError('no \n was present') # print 'removing start', offset, trim_start, repr(data[:trim_start]) if not end_adjacent: # work around python bug in rfind if trim_end is None: trim_end = data.rfind('\n') + 1 else: trim_end = data.rfind('\n', None, trim_end) + 1 if not (trim_end != 0): raise AssertionError('no \n was present') # print 'removing end', offset, trim_end, repr(data[trim_end:]) # adjust offset and data to the parseable data. trimmed_data = data[trim_start:trim_end] if not (trimmed_data): raise AssertionError('read unneeded data [%d:%d] from [%d:%d]' % (trim_start, trim_end, offset, offset + len(data))) if trim_start: offset += trim_start # print "parsing", repr(trimmed_data) # splitlines mangles the \r delimiters.. don't use it. lines = trimmed_data.split('\n') del lines[-1] pos = offset first_key, last_key, nodes, _ = self._parse_lines(lines, pos) for key, value in nodes: self._bisect_nodes[key] = value self._parsed_bytes(offset, first_key, offset + len(trimmed_data), last_key) return offset + len(trimmed_data), last_segment def _parse_lines(self, lines, pos): key = None first_key = None trailers = 0 nodes = [] for line in lines: if line == '': # must be at the end if self._size: if not (self._size == pos + 1): raise AssertionError("%s %s" % (self._size, pos)) trailers += 1 continue elements = line.split('\0') if len(elements) != self._expected_elements: raise errors.BadIndexData(self) # keys are tuples. Each element is a string that may occur many # times, so we intern them to save space. AB, RC, 200807 key = tuple([intern(element) for element in elements[:self._key_length]]) if first_key is None: first_key = key absent, references, value = elements[-3:] ref_lists = [] for ref_string in references.split('\t'): ref_lists.append(tuple([ int(ref) for ref in ref_string.split('\r') if ref ])) ref_lists = tuple(ref_lists) self._keys_by_offset[pos] = (key, absent, ref_lists, value) pos += len(line) + 1 # +1 for the \n if absent: continue if self.node_ref_lists: node_value = (value, ref_lists) else: node_value = value nodes.append((key, node_value)) # print "parsed ", key return first_key, key, nodes, trailers def _parsed_bytes(self, start, start_key, end, end_key): """Mark the bytes from start to end as parsed. Calling self._parsed_bytes(1,2) will mark one byte (the one at offset 1) as parsed. :param start: The start of the parsed region. :param end: The end of the parsed region. """ index = self._parsed_byte_index(start) new_value = (start, end) new_key = (start_key, end_key) if index == -1: # first range parsed is always the beginning. self._parsed_byte_map.insert(index, new_value) self._parsed_key_map.insert(index, new_key) return # four cases: # new region # extend lower region # extend higher region # combine two regions if (index + 1 < len(self._parsed_byte_map) and self._parsed_byte_map[index][1] == start and self._parsed_byte_map[index + 1][0] == end): # combine two regions self._parsed_byte_map[index] = (self._parsed_byte_map[index][0], self._parsed_byte_map[index + 1][1]) self._parsed_key_map[index] = (self._parsed_key_map[index][0], self._parsed_key_map[index + 1][1]) del self._parsed_byte_map[index + 1] del self._parsed_key_map[index + 1] elif self._parsed_byte_map[index][1] == start: # extend the lower entry self._parsed_byte_map[index] = ( self._parsed_byte_map[index][0], end) self._parsed_key_map[index] = ( self._parsed_key_map[index][0], end_key) elif (index + 1 < len(self._parsed_byte_map) and self._parsed_byte_map[index + 1][0] == end): # extend the higher entry self._parsed_byte_map[index + 1] = ( start, self._parsed_byte_map[index + 1][1]) self._parsed_key_map[index + 1] = ( start_key, self._parsed_key_map[index + 1][1]) else: # new entry self._parsed_byte_map.insert(index + 1, new_value) self._parsed_key_map.insert(index + 1, new_key) def _read_and_parse(self, readv_ranges): """Read the ranges and parse the resulting data. :param readv_ranges: A prepared readv range list. """ if not readv_ranges: return if self._nodes is None and self._bytes_read * 2 >= self._size: # We've already read more than 50% of the file and we are about to # request more data, just _buffer_all() and be done self._buffer_all() return base_offset = self._base_offset if base_offset != 0: # Rewrite the ranges for the offset readv_ranges = [(start+base_offset, size) for start, size in readv_ranges] readv_data = self._transport.readv(self._name, readv_ranges, True, self._size + self._base_offset) # parse for offset, data in readv_data: offset -= base_offset self._bytes_read += len(data) if offset < 0: # transport.readv() expanded to extra data which isn't part of # this index data = data[-offset:] offset = 0 if offset == 0 and len(data) == self._size: # We read the whole range, most likely because the # Transport upcast our readv ranges into one long request # for enough total data to grab the whole index. self._buffer_all(StringIO(data)) return if self._bisect_nodes is None: # this must be the start if not (offset == 0): raise AssertionError() offset, data = self._parse_header_from_bytes(data) # print readv_ranges, "[%d:%d]" % (offset, offset + len(data)) self._parse_region(offset, data) def _signature(self): """The file signature for this index type.""" return _SIGNATURE def validate(self): """Validate that everything in the index can be accessed.""" # iter_all validates completely at the moment, so just do that. for node in self.iter_all_entries(): pass class CombinedGraphIndex(object): """A GraphIndex made up from smaller GraphIndices. The backing indices must implement GraphIndex, and are presumed to be static data. Queries against the combined index will be made against the first index, and then the second and so on. The order of indices can thus influence performance significantly. For example, if one index is on local disk and a second on a remote server, the local disk index should be before the other in the index list. Also, queries tend to need results from the same indices as previous queries. So the indices will be reordered after every query to put the indices that had the result(s) of that query first (while otherwise preserving the relative ordering). """ def __init__(self, indices, reload_func=None): """Create a CombinedGraphIndex backed by indices. :param indices: An ordered list of indices to query for data. :param reload_func: A function to call if we find we are missing an index. Should have the form reload_func() => True/False to indicate if reloading actually changed anything. """ self._indices = indices self._reload_func = reload_func # Sibling indices are other CombinedGraphIndex that we should call # _move_to_front_by_name on when we auto-reorder ourself. self._sibling_indices = [] # A list of names that corresponds to the instances in self._indices, # so _index_names[0] is always the name for _indices[0], etc. Sibling # indices must all use the same set of names as each other. self._index_names = [None] * len(self._indices) def __repr__(self): return "%s(%s)" % ( self.__class__.__name__, ', '.join(map(repr, self._indices))) def clear_cache(self): """See GraphIndex.clear_cache()""" for index in self._indices: index.clear_cache() def get_parent_map(self, keys): """See graph.StackedParentsProvider.get_parent_map""" search_keys = set(keys) if _mod_revision.NULL_REVISION in search_keys: search_keys.discard(_mod_revision.NULL_REVISION) found_parents = {_mod_revision.NULL_REVISION:[]} else: found_parents = {} for index, key, value, refs in self.iter_entries(search_keys): parents = refs[0] if not parents: parents = (_mod_revision.NULL_REVISION,) found_parents[key] = parents return found_parents has_key = _has_key_from_parent_map def insert_index(self, pos, index, name=None): """Insert a new index in the list of indices to query. :param pos: The position to insert the index. :param index: The index to insert. :param name: a name for this index, e.g. a pack name. These names can be used to reflect index reorderings to related CombinedGraphIndex instances that use the same names. (see set_sibling_indices) """ self._indices.insert(pos, index) self._index_names.insert(pos, name) def iter_all_entries(self): """Iterate over all keys within the index Duplicate keys across child indices are presumed to have the same value and are only reported once. :return: An iterable of (index, key, reference_lists, value). There is no defined order for the result iteration - it will be in the most efficient order for the index. """ seen_keys = set() while True: try: for index in self._indices: for node in index.iter_all_entries(): if node[1] not in seen_keys: yield node seen_keys.add(node[1]) return except errors.NoSuchFile: self._reload_or_raise() def iter_entries(self, keys): """Iterate over keys within the index. Duplicate keys across child indices are presumed to have the same value and are only reported once. :param keys: An iterable providing the keys to be retrieved. :return: An iterable of (index, key, reference_lists, value). There is no defined order for the result iteration - it will be in the most efficient order for the index. """ keys = set(keys) hit_indices = [] while True: try: for index in self._indices: if not keys: break index_hit = False for node in index.iter_entries(keys): keys.remove(node[1]) yield node index_hit = True if index_hit: hit_indices.append(index) break except errors.NoSuchFile: self._reload_or_raise() self._move_to_front(hit_indices) def iter_entries_prefix(self, keys): """Iterate over keys within the index using prefix matching. Duplicate keys across child indices are presumed to have the same value and are only reported once. Prefix matching is applied within the tuple of a key, not to within the bytestring of each key element. e.g. if you have the keys ('foo', 'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then only the former key is returned. :param keys: An iterable providing the key prefixes to be retrieved. Each key prefix takes the form of a tuple the length of a key, but with the last N elements 'None' rather than a regular bytestring. The first element cannot be 'None'. :return: An iterable as per iter_all_entries, but restricted to the keys with a matching prefix to those supplied. No additional keys will be returned, and every match that is in the index will be returned. """ keys = set(keys) if not keys: return seen_keys = set() hit_indices = [] while True: try: for index in self._indices: index_hit = False for node in index.iter_entries_prefix(keys): if node[1] in seen_keys: continue seen_keys.add(node[1]) yield node index_hit = True if index_hit: hit_indices.append(index) break except errors.NoSuchFile: self._reload_or_raise() self._move_to_front(hit_indices) def _move_to_front(self, hit_indices): """Rearrange self._indices so that hit_indices are first. Order is maintained as much as possible, e.g. the first unhit index will be the first index in _indices after the hit_indices, and the hit_indices will be present in exactly the order they are passed to _move_to_front. _move_to_front propagates to all objects in self._sibling_indices by calling _move_to_front_by_name. """ if self._indices[:len(hit_indices)] == hit_indices: # The 'hit_indices' are already at the front (and in the same # order), no need to re-order return hit_names = self._move_to_front_by_index(hit_indices) for sibling_idx in self._sibling_indices: sibling_idx._move_to_front_by_name(hit_names) def _move_to_front_by_index(self, hit_indices): """Core logic for _move_to_front. Returns a list of names corresponding to the hit_indices param. """ indices_info = zip(self._index_names, self._indices) if 'index' in debug.debug_flags: trace.mutter('CombinedGraphIndex reordering: currently %r, ' 'promoting %r', indices_info, hit_indices) hit_names = [] unhit_names = [] new_hit_indices = [] unhit_indices = [] for offset, (name, idx) in enumerate(indices_info): if idx in hit_indices: hit_names.append(name) new_hit_indices.append(idx) if len(new_hit_indices) == len(hit_indices): # We've found all of the hit entries, everything else is # unhit unhit_names.extend(self._index_names[offset+1:]) unhit_indices.extend(self._indices[offset+1:]) break else: unhit_names.append(name) unhit_indices.append(idx) self._indices = new_hit_indices + unhit_indices self._index_names = hit_names + unhit_names if 'index' in debug.debug_flags: trace.mutter('CombinedGraphIndex reordered: %r', self._indices) return hit_names def _move_to_front_by_name(self, hit_names): """Moves indices named by 'hit_names' to front of the search order, as described in _move_to_front. """ # Translate names to index instances, and then call # _move_to_front_by_index. indices_info = zip(self._index_names, self._indices) hit_indices = [] for name, idx in indices_info: if name in hit_names: hit_indices.append(idx) self._move_to_front_by_index(hit_indices) def find_ancestry(self, keys, ref_list_num): """Find the complete ancestry for the given set of keys. Note that this is a whole-ancestry request, so it should be used sparingly. :param keys: An iterable of keys to look for :param ref_list_num: The reference list which references the parents we care about. :return: (parent_map, missing_keys) """ # XXX: make this call _move_to_front? missing_keys = set() parent_map = {} keys_to_lookup = set(keys) generation = 0 while keys_to_lookup: # keys that *all* indexes claim are missing, stop searching them generation += 1 all_index_missing = None # print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss' # print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup), # len(parent_map), # len(missing_keys)) for index_idx, index in enumerate(self._indices): # TODO: we should probably be doing something with # 'missing_keys' since we've already determined that # those revisions have not been found anywhere index_missing_keys = set() # Find all of the ancestry we can from this index # keep looking until the search_keys set is empty, which means # things we didn't find should be in index_missing_keys search_keys = keys_to_lookup sub_generation = 0 # print ' \t%2d\t\t%4d\t%5d\t%5d' % ( # index_idx, len(search_keys), # len(parent_map), len(index_missing_keys)) while search_keys: sub_generation += 1 # TODO: ref_list_num should really be a parameter, since # CombinedGraphIndex does not know what the ref lists # mean. search_keys = index._find_ancestors(search_keys, ref_list_num, parent_map, index_missing_keys) # print ' \t \t%2d\t%4d\t%5d\t%5d' % ( # sub_generation, len(search_keys), # len(parent_map), len(index_missing_keys)) # Now set whatever was missing to be searched in the next index keys_to_lookup = index_missing_keys if all_index_missing is None: all_index_missing = set(index_missing_keys) else: all_index_missing.intersection_update(index_missing_keys) if not keys_to_lookup: break if all_index_missing is None: # There were no indexes, so all search keys are 'missing' missing_keys.update(keys_to_lookup) keys_to_lookup = None else: missing_keys.update(all_index_missing) keys_to_lookup.difference_update(all_index_missing) return parent_map, missing_keys def key_count(self): """Return an estimate of the number of keys in this index. For CombinedGraphIndex this is approximated by the sum of the keys of the child indices. As child indices may have duplicate keys this can have a maximum error of the number of child indices * largest number of keys in any index. """ while True: try: return sum((index.key_count() for index in self._indices), 0) except errors.NoSuchFile: self._reload_or_raise() missing_keys = _missing_keys_from_parent_map def _reload_or_raise(self): """We just got a NoSuchFile exception. Try to reload the indices, if it fails, just raise the current exception. """ if self._reload_func is None: raise exc_type, exc_value, exc_traceback = sys.exc_info() trace.mutter('Trying to reload after getting exception: %s', exc_value) if not self._reload_func(): # We tried to reload, but nothing changed, so we fail anyway trace.mutter('_reload_func indicated nothing has changed.' ' Raising original exception.') raise exc_type, exc_value, exc_traceback def set_sibling_indices(self, sibling_combined_graph_indices): """Set the CombinedGraphIndex objects to reorder after reordering self. """ self._sibling_indices = sibling_combined_graph_indices def validate(self): """Validate that everything in the index can be accessed.""" while True: try: for index in self._indices: index.validate() return except errors.NoSuchFile: self._reload_or_raise() class InMemoryGraphIndex(GraphIndexBuilder): """A GraphIndex which operates entirely out of memory and is mutable. This is designed to allow the accumulation of GraphIndex entries during a single write operation, where the accumulated entries need to be immediately available - for example via a CombinedGraphIndex. """ def add_nodes(self, nodes): """Add nodes to the index. :param nodes: An iterable of (key, node_refs, value) entries to add. """ if self.reference_lists: for (key, value, node_refs) in nodes: self.add_node(key, value, node_refs) else: for (key, value) in nodes: self.add_node(key, value) def iter_all_entries(self): """Iterate over all keys within the index :return: An iterable of (index, key, reference_lists, value). There is no defined order for the result iteration - it will be in the most efficient order for the index (in this case dictionary hash order). """ if 'evil' in debug.debug_flags: trace.mutter_callsite(3, "iter_all_entries scales with size of history.") if self.reference_lists: for key, (absent, references, value) in self._nodes.iteritems(): if not absent: yield self, key, value, references else: for key, (absent, references, value) in self._nodes.iteritems(): if not absent: yield self, key, value def iter_entries(self, keys): """Iterate over keys within the index. :param keys: An iterable providing the keys to be retrieved. :return: An iterable of (index, key, value, reference_lists). There is no defined order for the result iteration - it will be in the most efficient order for the index (keys iteration order in this case). """ # Note: See BTreeBuilder.iter_entries for an explanation of why we # aren't using set().intersection() here nodes = self._nodes keys = [key for key in keys if key in nodes] if self.reference_lists: for key in keys: node = nodes[key] if not node[0]: yield self, key, node[2], node[1] else: for key in keys: node = nodes[key] if not node[0]: yield self, key, node[2] def iter_entries_prefix(self, keys): """Iterate over keys within the index using prefix matching. Prefix matching is applied within the tuple of a key, not to within the bytestring of each key element. e.g. if you have the keys ('foo', 'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then only the former key is returned. :param keys: An iterable providing the key prefixes to be retrieved. Each key prefix takes the form of a tuple the length of a key, but with the last N elements 'None' rather than a regular bytestring. The first element cannot be 'None'. :return: An iterable as per iter_all_entries, but restricted to the keys with a matching prefix to those supplied. No additional keys will be returned, and every match that is in the index will be returned. """ # XXX: To much duplication with the GraphIndex class; consider finding # a good place to pull out the actual common logic. keys = set(keys) if not keys: return if self._key_length == 1: for key in keys: # sanity check if key[0] is None: raise errors.BadIndexKey(key) if len(key) != self._key_length: raise errors.BadIndexKey(key) node = self._nodes[key] if node[0]: continue if self.reference_lists: yield self, key, node[2], node[1] else: yield self, key, node[2] return nodes_by_key = self._get_nodes_by_key() for key in keys: # sanity check if key[0] is None: raise errors.BadIndexKey(key) if len(key) != self._key_length: raise errors.BadIndexKey(key) # find what it refers to: key_dict = nodes_by_key elements = list(key) # find the subdict to return try: while len(elements) and elements[0] is not None: key_dict = key_dict[elements[0]] elements.pop(0) except KeyError: # a non-existant lookup. continue if len(elements): dicts = [key_dict] while dicts: key_dict = dicts.pop(-1) # can't be empty or would not exist item, value = key_dict.iteritems().next() if type(value) == dict: # push keys dicts.extend(key_dict.itervalues()) else: # yield keys for value in key_dict.itervalues(): yield (self, ) + value else: yield (self, ) + key_dict def key_count(self): """Return an estimate of the number of keys in this index. For InMemoryGraphIndex the estimate is exact. """ return len(self._nodes) - len(self._absent_keys) def validate(self): """In memory index's have no known corruption at the moment.""" class GraphIndexPrefixAdapter(object): """An adapter between GraphIndex with different key lengths. Queries against this will emit queries against the adapted Graph with the prefix added, queries for all items use iter_entries_prefix. The returned nodes will have their keys and node references adjusted to remove the prefix. Finally, an add_nodes_callback can be supplied - when called the nodes and references being added will have prefix prepended. """ def __init__(self, adapted, prefix, missing_key_length, add_nodes_callback=None): """Construct an adapter against adapted with prefix.""" self.adapted = adapted self.prefix_key = prefix + (None,)*missing_key_length self.prefix = prefix self.prefix_len = len(prefix) self.add_nodes_callback = add_nodes_callback def add_nodes(self, nodes): """Add nodes to the index. :param nodes: An iterable of (key, node_refs, value) entries to add. """ # save nodes in case its an iterator nodes = tuple(nodes) translated_nodes = [] try: # Add prefix_key to each reference node_refs is a tuple of tuples, # so split it apart, and add prefix_key to the internal reference for (key, value, node_refs) in nodes: adjusted_references = ( tuple(tuple(self.prefix + ref_node for ref_node in ref_list) for ref_list in node_refs)) translated_nodes.append((self.prefix + key, value, adjusted_references)) except ValueError: # XXX: TODO add an explicit interface for getting the reference list # status, to handle this bit of user-friendliness in the API more # explicitly. for (key, value) in nodes: translated_nodes.append((self.prefix + key, value)) self.add_nodes_callback(translated_nodes) def add_node(self, key, value, references=()): """Add a node to the index. :param key: The key. keys are non-empty tuples containing as many whitespace-free utf8 bytestrings as the key length defined for this index. :param references: An iterable of iterables of keys. Each is a reference to another key. :param value: The value to associate with the key. It may be any bytes as long as it does not contain \0 or \n. """ self.add_nodes(((key, value, references), )) def _strip_prefix(self, an_iter): """Strip prefix data from nodes and return it.""" for node in an_iter: # cross checks if node[1][:self.prefix_len] != self.prefix: raise errors.BadIndexData(self) for ref_list in node[3]: for ref_node in ref_list: if ref_node[:self.prefix_len] != self.prefix: raise errors.BadIndexData(self) yield node[0], node[1][self.prefix_len:], node[2], ( tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list) for ref_list in node[3])) def iter_all_entries(self): """Iterate over all keys within the index iter_all_entries is implemented against the adapted index using iter_entries_prefix. :return: An iterable of (index, key, reference_lists, value). There is no defined order for the result iteration - it will be in the most efficient order for the index (in this case dictionary hash order). """ return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key])) def iter_entries(self, keys): """Iterate over keys within the index. :param keys: An iterable providing the keys to be retrieved. :return: An iterable of (index, key, value, reference_lists). There is no defined order for the result iteration - it will be in the most efficient order for the index (keys iteration order in this case). """ return self._strip_prefix(self.adapted.iter_entries( self.prefix + key for key in keys)) def iter_entries_prefix(self, keys): """Iterate over keys within the index using prefix matching. Prefix matching is applied within the tuple of a key, not to within the bytestring of each key element. e.g. if you have the keys ('foo', 'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then only the former key is returned. :param keys: An iterable providing the key prefixes to be retrieved. Each key prefix takes the form of a tuple the length of a key, but with the last N elements 'None' rather than a regular bytestring. The first element cannot be 'None'. :return: An iterable as per iter_all_entries, but restricted to the keys with a matching prefix to those supplied. No additional keys will be returned, and every match that is in the index will be returned. """ return self._strip_prefix(self.adapted.iter_entries_prefix( self.prefix + key for key in keys)) def key_count(self): """Return an estimate of the number of keys in this index. For GraphIndexPrefixAdapter this is relatively expensive - key iteration with the prefix is done. """ return len(list(self.iter_all_entries())) def validate(self): """Call the adapted's validate.""" self.adapted.validate() bzr-2.7.0/bzrlib/info.py0000644000000000000000000004532011720446360013243 0ustar 00000000000000# Copyright (C) 2005-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import __all__ = ['show_bzrdir_info'] from cStringIO import StringIO import time import sys from bzrlib import ( bzrdir, controldir, errors, hooks as _mod_hooks, osutils, urlutils, ) from bzrlib.errors import (NoWorkingTree, NotBranchError, NoRepositoryPresent, NotLocalUrl) from bzrlib.missing import find_unmerged def plural(n, base='', pl=None): if n == 1: return base elif pl is not None: return pl else: return 's' class LocationList(object): def __init__(self, base_path): self.locs = [] self.base_path = base_path def add_url(self, label, url): """Add a URL to the list, converting it to a path if possible""" if url is None: return try: path = urlutils.local_path_from_url(url) except errors.InvalidURL: self.locs.append((label, url)) else: self.add_path(label, path) def add_path(self, label, path): """Add a path, converting it to a relative path if possible""" try: path = osutils.relpath(self.base_path, path) except errors.PathNotChild: pass else: if path == '': path = '.' if path != '/': path = path.rstrip('/') self.locs.append((label, path)) def get_lines(self): max_len = max(len(l) for l, u in self.locs) return [" %*s: %s\n" % (max_len, l, u) for l, u in self.locs ] def gather_location_info(repository=None, branch=None, working=None, control=None): locs = {} if branch is not None: branch_path = branch.user_url master_path = branch.get_bound_location() if master_path is None: master_path = branch_path else: branch_path = None master_path = None try: if control is not None and control.get_branch_reference(): locs['checkout of branch'] = control.get_branch_reference() except NotBranchError: pass if working: working_path = working.user_url if working_path != branch_path: locs['light checkout root'] = working_path if master_path != branch_path: if repository.is_shared(): locs['repository checkout root'] = branch_path else: locs['checkout root'] = branch_path if working_path != master_path: locs['checkout of branch'] = master_path elif repository.is_shared(): locs['repository branch'] = branch_path elif branch_path is not None: # standalone locs['branch root'] = branch_path else: working_path = None if repository is not None and repository.is_shared(): # lightweight checkout of branch in shared repository if branch_path is not None: locs['repository branch'] = branch_path elif branch_path is not None: # standalone locs['branch root'] = branch_path elif repository is not None: locs['repository'] = repository.user_url elif control is not None: locs['control directory'] = control.user_url else: # Really, at least a control directory should be # passed in for this method to be useful. pass if master_path != branch_path: locs['bound to branch'] = master_path if repository is not None and repository.is_shared(): # lightweight checkout of branch in shared repository locs['shared repository'] = repository.user_url order = ['control directory', 'light checkout root', 'repository checkout root', 'checkout root', 'checkout of branch', 'shared repository', 'repository', 'repository branch', 'branch root', 'bound to branch'] return [(n, locs[n]) for n in order if n in locs] def _show_location_info(locs, outfile): """Show known locations for working, branch and repository.""" outfile.write('Location:\n') path_list = LocationList(osutils.getcwd()) for name, loc in locs: path_list.add_url(name, loc) outfile.writelines(path_list.get_lines()) def _gather_related_branches(branch): locs = LocationList(osutils.getcwd()) locs.add_url('public branch', branch.get_public_branch()) locs.add_url('push branch', branch.get_push_location()) locs.add_url('parent branch', branch.get_parent()) locs.add_url('submit branch', branch.get_submit_branch()) try: locs.add_url('stacked on', branch.get_stacked_on_url()) except (errors.UnstackableBranchFormat, errors.UnstackableRepositoryFormat, errors.NotStacked): pass return locs def _show_related_info(branch, outfile): """Show parent and push location of branch.""" locs = _gather_related_branches(branch) if len(locs.locs) > 0: outfile.write('\n') outfile.write('Related branches:\n') outfile.writelines(locs.get_lines()) def _show_control_dir_info(control, outfile): """Show control dir information.""" if control._format.colocated_branches: outfile.write('\n') outfile.write('Control directory:\n') outfile.write(' %d branches\n' % len(control.list_branches())) def _show_format_info(control=None, repository=None, branch=None, working=None, outfile=None): """Show known formats for control, working, branch and repository.""" outfile.write('\n') outfile.write('Format:\n') if control: outfile.write(' control: %s\n' % control._format.get_format_description()) if working: outfile.write(' working tree: %s\n' % working._format.get_format_description()) if branch: outfile.write(' branch: %s\n' % branch._format.get_format_description()) if repository: outfile.write(' repository: %s\n' % repository._format.get_format_description()) def _show_locking_info(repository=None, branch=None, working=None, outfile=None): """Show locking status of working, branch and repository.""" if (repository and repository.get_physical_lock_status() or (branch and branch.get_physical_lock_status()) or (working and working.get_physical_lock_status())): outfile.write('\n') outfile.write('Lock status:\n') if working: if working.get_physical_lock_status(): status = 'locked' else: status = 'unlocked' outfile.write(' working tree: %s\n' % status) if branch: if branch.get_physical_lock_status(): status = 'locked' else: status = 'unlocked' outfile.write(' branch: %s\n' % status) if repository: if repository.get_physical_lock_status(): status = 'locked' else: status = 'unlocked' outfile.write(' repository: %s\n' % status) def _show_missing_revisions_branch(branch, outfile): """Show missing master revisions in branch.""" # Try with inaccessible branch ? master = branch.get_master_branch() if master: local_extra, remote_extra = find_unmerged(branch, master) if remote_extra: outfile.write('\n') outfile.write(('Branch is out of date: missing %d ' 'revision%s.\n') % (len(remote_extra), plural(len(remote_extra)))) def _show_missing_revisions_working(working, outfile): """Show missing revisions in working tree.""" branch = working.branch basis = working.basis_tree() try: branch_revno, branch_last_revision = branch.last_revision_info() except errors.UnsupportedOperation: return try: tree_last_id = working.get_parent_ids()[0] except IndexError: tree_last_id = None if branch_revno and tree_last_id != branch_last_revision: tree_last_revno = branch.revision_id_to_revno(tree_last_id) missing_count = branch_revno - tree_last_revno outfile.write('\n') outfile.write(('Working tree is out of date: missing %d ' 'revision%s.\n') % (missing_count, plural(missing_count))) def _show_working_stats(working, outfile): """Show statistics about a working tree.""" basis = working.basis_tree() delta = working.changes_from(basis, want_unchanged=True) outfile.write('\n') outfile.write('In the working tree:\n') outfile.write(' %8s unchanged\n' % len(delta.unchanged)) outfile.write(' %8d modified\n' % len(delta.modified)) outfile.write(' %8d added\n' % len(delta.added)) outfile.write(' %8d removed\n' % len(delta.removed)) outfile.write(' %8d renamed\n' % len(delta.renamed)) ignore_cnt = unknown_cnt = 0 for path in working.extras(): if working.is_ignored(path): ignore_cnt += 1 else: unknown_cnt += 1 outfile.write(' %8d unknown\n' % unknown_cnt) outfile.write(' %8d ignored\n' % ignore_cnt) dir_cnt = 0 root_id = working.get_root_id() for path, entry in working.iter_entries_by_dir(): if entry.kind == 'directory' and entry.file_id != root_id: dir_cnt += 1 outfile.write(' %8d versioned %s\n' % (dir_cnt, plural(dir_cnt, 'subdirectory', 'subdirectories'))) def _show_branch_stats(branch, verbose, outfile): """Show statistics about a branch.""" try: revno, head = branch.last_revision_info() except errors.UnsupportedOperation: return {} outfile.write('\n') outfile.write('Branch history:\n') outfile.write(' %8d revision%s\n' % (revno, plural(revno))) stats = branch.repository.gather_stats(head, committers=verbose) if verbose: committers = stats['committers'] outfile.write(' %8d committer%s\n' % (committers, plural(committers))) if revno: timestamp, timezone = stats['firstrev'] age = int((time.time() - timestamp) / 3600 / 24) outfile.write(' %8d day%s old\n' % (age, plural(age))) outfile.write(' first revision: %s\n' % osutils.format_date(timestamp, timezone)) timestamp, timezone = stats['latestrev'] outfile.write(' latest revision: %s\n' % osutils.format_date(timestamp, timezone)) return stats def _show_repository_info(repository, outfile): """Show settings of a repository.""" if repository.make_working_trees(): outfile.write('\n') outfile.write('Create working tree for new branches inside ' 'the repository.\n') def _show_repository_stats(repository, stats, outfile): """Show statistics about a repository.""" f = StringIO() if 'revisions' in stats: revisions = stats['revisions'] f.write(' %8d revision%s\n' % (revisions, plural(revisions))) if 'size' in stats: f.write(' %8d KiB\n' % (stats['size']/1024)) for hook in hooks['repository']: hook(repository, stats, f) if f.getvalue() != "": outfile.write('\n') outfile.write('Repository:\n') outfile.write(f.getvalue()) def show_bzrdir_info(a_bzrdir, verbose=False, outfile=None): """Output to stdout the 'info' for a_bzrdir.""" if outfile is None: outfile = sys.stdout try: tree = a_bzrdir.open_workingtree( recommend_upgrade=False) except (NoWorkingTree, NotLocalUrl, NotBranchError): tree = None try: branch = a_bzrdir.open_branch(name="") except NotBranchError: branch = None try: repository = a_bzrdir.open_repository() except NoRepositoryPresent: lockable = None repository = None else: lockable = repository else: repository = branch.repository lockable = branch else: branch = tree.branch repository = branch.repository lockable = tree if lockable is not None: lockable.lock_read() try: show_component_info(a_bzrdir, repository, branch, tree, verbose, outfile) finally: if lockable is not None: lockable.unlock() def show_component_info(control, repository, branch=None, working=None, verbose=1, outfile=None): """Write info about all bzrdir components to stdout""" if outfile is None: outfile = sys.stdout if verbose is False: verbose = 1 if verbose is True: verbose = 2 layout = describe_layout(repository, branch, working, control) format = describe_format(control, repository, branch, working) outfile.write("%s (format: %s)\n" % (layout, format)) _show_location_info( gather_location_info(control=control, repository=repository, branch=branch, working=working), outfile) if branch is not None: _show_related_info(branch, outfile) if verbose == 0: return _show_format_info(control, repository, branch, working, outfile) _show_locking_info(repository, branch, working, outfile) _show_control_dir_info(control, outfile) if branch is not None: _show_missing_revisions_branch(branch, outfile) if working is not None: _show_missing_revisions_working(working, outfile) _show_working_stats(working, outfile) elif branch is not None: _show_missing_revisions_branch(branch, outfile) if branch is not None: show_committers = verbose >= 2 stats = _show_branch_stats(branch, show_committers, outfile) elif repository is not None: stats = repository.gather_stats() if branch is None and working is None and repository is not None: _show_repository_info(repository, outfile) if repository is not None: _show_repository_stats(repository, stats, outfile) def describe_layout(repository=None, branch=None, tree=None, control=None): """Convert a control directory layout into a user-understandable term Common outputs include "Standalone tree", "Repository branch" and "Checkout". Uncommon outputs include "Unshared repository with trees" and "Empty control directory" """ if branch is None and control is not None: try: branch_reference = control.get_branch_reference() except NotBranchError: pass else: if branch_reference is not None: return "Dangling branch reference" if repository is None: return 'Empty control directory' if branch is None and tree is None: if repository.is_shared(): phrase = 'Shared repository' else: phrase = 'Unshared repository' extra = [] if repository.make_working_trees(): extra.append('trees') if len(control.get_branches()) > 0: extra.append('colocated branches') if extra: phrase += ' with ' + " and ".join(extra) return phrase else: if repository.is_shared(): independence = "Repository " else: independence = "Standalone " if tree is not None: phrase = "tree" else: phrase = "branch" if branch is None and tree is not None: phrase = "branchless tree" else: if (tree is not None and tree.user_url != branch.user_url): independence = '' phrase = "Lightweight checkout" elif branch.get_bound_location() is not None: if independence == 'Standalone ': independence = '' if tree is None: phrase = "Bound branch" else: phrase = "Checkout" if independence != "": phrase = phrase.lower() return "%s%s" % (independence, phrase) def describe_format(control, repository, branch, tree): """Determine the format of an existing control directory Several candidates may be found. If so, the names are returned as a single string, separated by ' or '. If no matching candidate is found, "unnamed" is returned. """ candidates = [] if (branch is not None and tree is not None and branch.user_url != tree.user_url): branch = None repository = None non_aliases = set(controldir.format_registry.keys()) non_aliases.difference_update(controldir.format_registry.aliases()) for key in non_aliases: format = controldir.format_registry.make_bzrdir(key) if isinstance(format, bzrdir.BzrDirMetaFormat1): if (tree and format.workingtree_format != tree._format): continue if (branch and format.get_branch_format() != branch._format): continue if (repository and format.repository_format != repository._format): continue if format.__class__ is not control._format.__class__: continue candidates.append(key) if len(candidates) == 0: return 'unnamed' candidates.sort() new_candidates = [c for c in candidates if not controldir.format_registry.get_info(c).hidden] if len(new_candidates) > 0: # If there are any non-hidden formats that match, only return those to # avoid listing hidden formats except when only a hidden format will # do. candidates = new_candidates return ' or '.join(candidates) class InfoHooks(_mod_hooks.Hooks): """Hooks for the info command.""" def __init__(self): super(InfoHooks, self).__init__("bzrlib.info", "hooks") self.add_hook('repository', "Invoked when displaying the statistics for a repository. " "repository is called with a statistics dictionary as returned " "by the repository and a file-like object to write to.", (1, 15)) hooks = InfoHooks() bzr-2.7.0/bzrlib/inspect_for_copy.py0000644000000000000000000000507711673635356015676 0ustar 00000000000000# Copyright (C) 2005, 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """A version of inspect that includes what 'copy' needs. Importing the python standard module 'copy' is far more expensive than it needs to be, because copy imports 'inspect' which imports 'tokenize'. And 'copy' only needs 2 small functions out of 'inspect', but has to load all of 'tokenize', which makes it horribly slow. This module is designed to use tricky hacks in import rules, to avoid this overhead. """ from __future__ import absolute_import #### # These are the only 2 functions that 'copy' needs from 'inspect' # As you can see, they are quite trivial, and don't justify the # 40ms spent to import 'inspect' because it is importing 'tokenize' # These are copied verbatim from the python standard library. # ----------------------------------------------------------- class helpers def _searchbases(cls, accum): # Simulate the "classic class" search order. if cls in accum: return accum.append(cls) for base in cls.__bases__: _searchbases(base, accum) def getmro(cls): "Return tuple of base classes (including cls) in method resolution order." if hasattr(cls, "__mro__"): return cls.__mro__ else: result = [] _searchbases(cls, result) return tuple(result) def import_copy_with_hacked_inspect(): """Import the 'copy' module with a hacked 'inspect' module""" # We don't actually care about 'getmro' but we need to pass # something in the list so that we get the direct module, # rather than getting the base module import sys # Don't hack around if 'inspect' already exists if 'inspect' in sys.modules: import copy return mod = __import__('bzrlib.inspect_for_copy', globals(), locals(), ['getmro']) sys.modules['inspect'] = mod try: import copy finally: del sys.modules['inspect'] bzr-2.7.0/bzrlib/inter.py0000644000000000000000000001103211673635356013436 0ustar 00000000000000# Copyright (C) 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Inter-object utility class.""" from __future__ import absolute_import from bzrlib.errors import NoCompatibleInter class InterObject(object): """This class represents operations taking place between two objects. Its instances have methods like join or copy_content or fetch, and contain references to the source and target objects these operations can be carried out between. Often we will provide convenience methods on the objects which carry out operations with another of similar type - they will always forward to a subclass of InterObject - i.e. InterVersionedFile.get(other).method_name(parameters). If the source and target objects implement the locking protocol - lock_read, lock_write, unlock, then the InterObject's lock_read, lock_write and unlock methods may be used (optionally in conjunction with the needs_read_lock and needs_write_lock decorators.) When looking for an inter, the most recently registered types are tested first. So typically the most generic and slowest InterObjects should be registered first. """ # _optimisers = list() # Each concrete InterObject type should have its own optimisers list. def __init__(self, source, target): """Construct a default InterObject instance. Please use 'get'. Only subclasses of InterObject should call InterObject.__init__ - clients should call InterFOO.get where FOO is the base type of the objects they are interacting between. I.e. InterVersionedFile or InterRepository. get() is a convenience class method which will create an optimised InterFOO if possible. """ self.source = source self.target = target def _double_lock(self, lock_source, lock_target): """Take out two locks, rolling back the first if the second throws.""" lock_source() try: lock_target() except Exception: # we want to ensure that we don't leave source locked by mistake. # and any error on target should not confuse source. self.source.unlock() raise @classmethod def get(klass, source, target): """Retrieve a Inter worker object for these objects. :param source: the object to be the 'source' member of the InterObject instance. :param target: the object to be the 'target' member of the InterObject instance. If an optimised worker exists it will be used otherwise a default Inter worker instance will be created. """ for provider in reversed(klass._optimisers): if provider.is_compatible(source, target): return provider(source, target) raise NoCompatibleInter(source, target) def lock_read(self): """Take out a logical read lock. This will lock the source branch and the target branch. The source gets a read lock and the target a read lock. """ self._double_lock(self.source.lock_read, self.target.lock_read) def lock_write(self): """Take out a logical write lock. This will lock the source branch and the target branch. The source gets a read lock and the target a write lock. """ self._double_lock(self.source.lock_read, self.target.lock_write) @classmethod def register_optimiser(klass, optimiser): """Register an InterObject optimiser.""" klass._optimisers.append(optimiser) def unlock(self): """Release the locks on source and target.""" try: self.target.unlock() finally: self.source.unlock() @classmethod def unregister_optimiser(klass, optimiser): """Unregister an InterObject optimiser.""" klass._optimisers.remove(optimiser) bzr-2.7.0/bzrlib/intset.py0000644000000000000000000001237211673403246013622 0ustar 00000000000000# Copyright (C) 2005 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import # Author: Martin Pool # Somewhat surprisingly, it turns out that this is much slower than # simply storing the ints in a set() type. Python's performance model # is very different to that of C. class IntSet(Exception): """Faster set-like class storing only whole numbers. Despite the name this stores long integers happily, but negative values are not allowed. >>> a = IntSet([0, 2, 5]) >>> bool(a) True >>> 2 in a True >>> 4 in a False >>> a.add(4) >>> 4 in a True >>> b = IntSet() >>> not b True >>> b.add(10) >>> 10 in a False >>> a.update(b) >>> 10 in a True >>> a.update(range(5)) >>> 3 in a True Being a set, duplicates are ignored: >>> a = IntSet() >>> a.add(10) >>> a.add(10) >>> 10 in a True >>> list(a) [10] """ __slots__ = ['_val'] def __init__(self, values=None, bitmask=0L): """Create a new intset. values If specified, an initial collection of values. """ self._val = bitmask if values is not None: self.update(values) def __nonzero__(self): """IntSets are false if empty, otherwise True. >>> bool(IntSet()) False >>> bool(IntSet([0])) True """ return bool(self._val) def __len__(self): """Number of elements in set. >>> len(IntSet(xrange(20000))) 20000 """ v = self._val c = 0 while v: if v & 1: c += 1 v = v >> 1 return c def __and__(self, other): """Set intersection. >>> a = IntSet(range(10)) >>> len(a) 10 >>> b = a & a >>> b == a True >>> a = a & IntSet([5, 7, 11, 13]) >>> list(a) [5, 7] """ if not isinstance(other, IntSet): raise NotImplementedError(type(other)) return IntSet(bitmask=(self._val & other._val)) def __or__(self, other): """Set union. >>> a = IntSet(range(10)) | IntSet([5, 15, 25]) >>> len(a) 12 """ if not isinstance(other, IntSet): raise NotImplementedError(type(other)) return IntSet(bitmask=(self._val | other._val)) def __eq__(self, other): """Comparison. >>> IntSet(range(3)) == IntSet([2, 0, 1]) True """ if isinstance(other, IntSet): return self._val == other._val else: return False def __ne__(self, other): return not self.__eq__(other) def __contains__(self, i): return self._val & (1L << i) def __iter__(self): """Return contents of set. >>> list(IntSet()) [] >>> list(IntSet([0, 1, 5, 7])) [0, 1, 5, 7] """ v = self._val o = 0 # XXX: This is a bit slow while v: if v & 1: yield o v = v >> 1 o = o + 1 def update(self, to_add): """Add all the values from the sequence or intset to_add""" if isinstance(to_add, IntSet): self._val |= to_add._val else: for i in to_add: self._val |= (1L << i) def add(self, to_add): self._val |= (1L << to_add) def remove(self, to_remove): """Remove one value from the set. Raises KeyError if the value is not present. >>> a = IntSet([10]) >>> a.remove(9) Traceback (most recent call last): File "/usr/lib/python2.4/doctest.py", line 1243, in __run compileflags, 1) in test.globs File "", line 1, in ? a.remove(9) KeyError: 9 >>> a.remove(10) >>> not a True """ m = 1L << to_remove if not self._val & m: raise KeyError(to_remove) self._val ^= m def set_remove(self, to_remove): """Remove all values that exist in to_remove. >>> a = IntSet(range(10)) >>> b = IntSet([2,3,4,7,12]) >>> a.set_remove(b) >>> list(a) [0, 1, 5, 6, 8, 9] >>> a.set_remove([1,2,5]) >>> list(a) [0, 6, 8, 9] """ if not isinstance(to_remove, IntSet): self.set_remove(IntSet(to_remove)) return intersect = self._val & to_remove._val self._val ^= intersect bzr-2.7.0/bzrlib/inventory.py0000644000000000000000000027053311730054002014340 0ustar 00000000000000# Copyright (C) 2005-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # FIXME: This refactoring of the workingtree code doesn't seem to keep # the WorkingTree's copy of the inventory in sync with the branch. The # branch modifies its working inventory when it does a commit to make # missing files permanently removed. # TODO: Maybe also keep the full path of the entry, and the children? # But those depend on its position within a particular inventory, and # it would be nice not to need to hold the backpointer here. from __future__ import absolute_import # This should really be an id randomly assigned when the tree is # created, but it's not for now. ROOT_ID = "TREE_ROOT" from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import collections import copy import re import tarfile from bzrlib import ( chk_map, errors, generate_ids, osutils, ) """) from bzrlib import ( lazy_regex, trace, ) from bzrlib.static_tuple import StaticTuple from bzrlib.symbol_versioning import ( deprecated_in, deprecated_method, ) class InventoryEntry(object): """Description of a versioned file. An InventoryEntry has the following fields, which are also present in the XML inventory-entry element: file_id name (within the parent directory) parent_id file_id of the parent directory, or ROOT_ID revision the revision_id in which this variation of this file was introduced. executable Indicates that this file should be executable on systems that support it. text_sha1 sha-1 of the text of the file text_size size in bytes of the text of the file (reading a version 4 tree created a text_id field.) >>> i = Inventory() >>> i.path2id('') 'TREE_ROOT' >>> i.add(InventoryDirectory('123', 'src', ROOT_ID)) InventoryDirectory('123', 'src', parent_id='TREE_ROOT', revision=None) >>> i.add(InventoryFile('2323', 'hello.c', parent_id='123')) InventoryFile('2323', 'hello.c', parent_id='123', sha1=None, len=None, revision=None) >>> shouldbe = {0: '', 1: 'src', 2: 'src/hello.c'} >>> for ix, j in enumerate(i.iter_entries()): ... print (j[0] == shouldbe[ix], j[1]) ... (True, InventoryDirectory('TREE_ROOT', u'', parent_id=None, revision=None)) (True, InventoryDirectory('123', 'src', parent_id='TREE_ROOT', revision=None)) (True, InventoryFile('2323', 'hello.c', parent_id='123', sha1=None, len=None, revision=None)) >>> i.add(InventoryFile('2324', 'bye.c', '123')) InventoryFile('2324', 'bye.c', parent_id='123', sha1=None, len=None, revision=None) >>> i.add(InventoryDirectory('2325', 'wibble', '123')) InventoryDirectory('2325', 'wibble', parent_id='123', revision=None) >>> i.path2id('src/wibble') '2325' >>> i.add(InventoryFile('2326', 'wibble.c', '2325')) InventoryFile('2326', 'wibble.c', parent_id='2325', sha1=None, len=None, revision=None) >>> i['2326'] InventoryFile('2326', 'wibble.c', parent_id='2325', sha1=None, len=None, revision=None) >>> for path, entry in i.iter_entries(): ... print path ... src src/bye.c src/hello.c src/wibble src/wibble/wibble.c >>> i.id2path('2326') 'src/wibble/wibble.c' """ # Constants returned by describe_change() # # TODO: These should probably move to some kind of FileChangeDescription # class; that's like what's inside a TreeDelta but we want to be able to # generate them just for one file at a time. RENAMED = 'renamed' MODIFIED_AND_RENAMED = 'modified and renamed' __slots__ = ['file_id', 'revision', 'parent_id', 'name'] # Attributes that all InventoryEntry instances are expected to have, but # that don't vary for all kinds of entry. (e.g. symlink_target is only # relevant to InventoryLink, so there's no reason to make every # InventoryFile instance allocate space to hold a value for it.) # Attributes that only vary for files: executable, text_sha1, text_size, # text_id executable = False text_sha1 = None text_size = None text_id = None # Attributes that only vary for symlinks: symlink_target symlink_target = None # Attributes that only vary for tree-references: reference_revision reference_revision = None def detect_changes(self, old_entry): """Return a (text_modified, meta_modified) from this to old_entry. _read_tree_state must have been called on self and old_entry prior to calling detect_changes. """ return False, False def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree, output_to, reverse=False): """Perform a diff between two entries of the same kind.""" def parent_candidates(self, previous_inventories): """Find possible per-file graph parents. This is currently defined by: - Select the last changed revision in the parent inventory. - Do deal with a short lived bug in bzr 0.8's development two entries that have the same last changed but different 'x' bit settings are changed in-place. """ # revision:ie mapping for each ie found in previous_inventories. candidates = {} # identify candidate head revision ids. for inv in previous_inventories: if inv.has_id(self.file_id): ie = inv[self.file_id] if ie.revision in candidates: # same revision value in two different inventories: # correct possible inconsistencies: # * there was a bug in revision updates with 'x' bit # support. try: if candidates[ie.revision].executable != ie.executable: candidates[ie.revision].executable = False ie.executable = False except AttributeError: pass else: # add this revision as a candidate. candidates[ie.revision] = ie return candidates def has_text(self): """Return true if the object this entry represents has textual data. Note that textual data includes binary content. Also note that all entries get weave files created for them. This attribute is primarily used when upgrading from old trees that did not have the weave index for all inventory entries. """ return False def __init__(self, file_id, name, parent_id): """Create an InventoryEntry The filename must be a single component, relative to the parent directory; it cannot be a whole path or relative name. >>> e = InventoryFile('123', 'hello.c', ROOT_ID) >>> e.name 'hello.c' >>> e.file_id '123' >>> e = InventoryFile('123', 'src/hello.c', ROOT_ID) Traceback (most recent call last): InvalidEntryName: Invalid entry name: src/hello.c """ if '/' in name or '\\' in name: raise errors.InvalidEntryName(name=name) self.file_id = file_id self.revision = None self.name = name self.parent_id = parent_id def kind_character(self): """Return a short kind indicator useful for appending to names.""" raise errors.BzrError('unknown kind %r' % self.kind) known_kinds = ('file', 'directory', 'symlink') def sorted_children(self): return sorted(self.children.items()) @staticmethod def versionable_kind(kind): return (kind in ('file', 'directory', 'symlink', 'tree-reference')) def check(self, checker, rev_id, inv): """Check this inventory entry is intact. This is a template method, override _check for kind specific tests. :param checker: Check object providing context for the checks; can be used to find out what parts of the repository have already been checked. :param rev_id: Revision id from which this InventoryEntry was loaded. Not necessarily the last-changed revision for this file. :param inv: Inventory from which the entry was loaded. """ if self.parent_id is not None: if not inv.has_id(self.parent_id): raise errors.BzrCheckError( 'missing parent {%s} in inventory for revision {%s}' % ( self.parent_id, rev_id)) checker._add_entry_to_text_key_references(inv, self) self._check(checker, rev_id) def _check(self, checker, rev_id): """Check this inventory entry for kind specific errors.""" checker._report_items.append( 'unknown entry kind %r in revision {%s}' % (self.kind, rev_id)) def copy(self): """Clone this inventory entry.""" raise NotImplementedError @staticmethod def describe_change(old_entry, new_entry): """Describe the change between old_entry and this. This smells of being an InterInventoryEntry situation, but as its the first one, we're making it a static method for now. An entry with a different parent, or different name is considered to be renamed. Reparenting is an internal detail. Note that renaming the parent does not trigger a rename for the child entry itself. """ # TODO: Perhaps return an object rather than just a string if old_entry is new_entry: # also the case of both being None return 'unchanged' elif old_entry is None: return 'added' elif new_entry is None: return 'removed' if old_entry.kind != new_entry.kind: return 'modified' text_modified, meta_modified = new_entry.detect_changes(old_entry) if text_modified or meta_modified: modified = True else: modified = False # TODO 20060511 (mbp, rbc) factor out 'detect_rename' here. if old_entry.parent_id != new_entry.parent_id: renamed = True elif old_entry.name != new_entry.name: renamed = True else: renamed = False if renamed and not modified: return InventoryEntry.RENAMED if modified and not renamed: return 'modified' if modified and renamed: return InventoryEntry.MODIFIED_AND_RENAMED return 'unchanged' def __repr__(self): return ("%s(%r, %r, parent_id=%r, revision=%r)" % (self.__class__.__name__, self.file_id, self.name, self.parent_id, self.revision)) def __eq__(self, other): if other is self: # For the case when objects are cached return True if not isinstance(other, InventoryEntry): return NotImplemented return ((self.file_id == other.file_id) and (self.name == other.name) and (other.symlink_target == self.symlink_target) and (self.text_sha1 == other.text_sha1) and (self.text_size == other.text_size) and (self.text_id == other.text_id) and (self.parent_id == other.parent_id) and (self.kind == other.kind) and (self.revision == other.revision) and (self.executable == other.executable) and (self.reference_revision == other.reference_revision) ) def __ne__(self, other): return not (self == other) def __hash__(self): raise ValueError('not hashable') def _unchanged(self, previous_ie): """Has this entry changed relative to previous_ie. This method should be overridden in child classes. """ compatible = True # different inv parent if previous_ie.parent_id != self.parent_id: compatible = False # renamed elif previous_ie.name != self.name: compatible = False elif previous_ie.kind != self.kind: compatible = False return compatible def _read_tree_state(self, path, work_tree): """Populate fields in the inventory entry from the given tree. Note that this should be modified to be a noop on virtual trees as all entries created there are prepopulated. """ # TODO: Rather than running this manually, we should check the # working sha1 and other expensive properties when they're # first requested, or preload them if they're already known pass # nothing to do by default def _forget_tree_state(self): pass class InventoryDirectory(InventoryEntry): """A directory in an inventory.""" __slots__ = ['children'] kind = 'directory' def _check(self, checker, rev_id): """See InventoryEntry._check""" # In non rich root repositories we do not expect a file graph for the # root. if self.name == '' and not checker.rich_roots: return # Directories are stored as an empty file, but the file should exist # to provide a per-fileid log. The hash of every directory content is # "da..." below (the sha1sum of ''). checker.add_pending_item(rev_id, ('texts', self.file_id, self.revision), 'text', 'da39a3ee5e6b4b0d3255bfef95601890afd80709') def copy(self): other = InventoryDirectory(self.file_id, self.name, self.parent_id) other.revision = self.revision # note that children are *not* copied; they're pulled across when # others are added return other def __init__(self, file_id, name, parent_id): super(InventoryDirectory, self).__init__(file_id, name, parent_id) self.children = {} def kind_character(self): """See InventoryEntry.kind_character.""" return '/' class InventoryFile(InventoryEntry): """A file in an inventory.""" __slots__ = ['text_sha1', 'text_size', 'text_id', 'executable'] kind = 'file' def __init__(self, file_id, name, parent_id): super(InventoryFile, self).__init__(file_id, name, parent_id) self.text_sha1 = None self.text_size = None self.text_id = None self.executable = False def _check(self, checker, tree_revision_id): """See InventoryEntry._check""" # TODO: check size too. checker.add_pending_item(tree_revision_id, ('texts', self.file_id, self.revision), 'text', self.text_sha1) if self.text_size is None: checker._report_items.append( 'fileid {%s} in {%s} has None for text_size' % (self.file_id, tree_revision_id)) def copy(self): other = InventoryFile(self.file_id, self.name, self.parent_id) other.executable = self.executable other.text_id = self.text_id other.text_sha1 = self.text_sha1 other.text_size = self.text_size other.revision = self.revision return other def detect_changes(self, old_entry): """See InventoryEntry.detect_changes.""" text_modified = (self.text_sha1 != old_entry.text_sha1) meta_modified = (self.executable != old_entry.executable) return text_modified, meta_modified def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree, output_to, reverse=False): """See InventoryEntry._diff.""" from bzrlib.diff import DiffText from_file_id = self.file_id if to_entry: to_file_id = to_entry.file_id else: to_file_id = None if reverse: to_file_id, from_file_id = from_file_id, to_file_id tree, to_tree = to_tree, tree from_label, to_label = to_label, from_label differ = DiffText(tree, to_tree, output_to, 'utf-8', '', '', text_diff) return differ.diff_text(from_file_id, to_file_id, from_label, to_label) def has_text(self): """See InventoryEntry.has_text.""" return True def kind_character(self): """See InventoryEntry.kind_character.""" return '' def _read_tree_state(self, path, work_tree): """See InventoryEntry._read_tree_state.""" self.text_sha1 = work_tree.get_file_sha1(self.file_id, path=path) # FIXME: 20050930 probe for the text size when getting sha1 # in _read_tree_state self.executable = work_tree.is_executable(self.file_id, path=path) def __repr__(self): return ("%s(%r, %r, parent_id=%r, sha1=%r, len=%s, revision=%s)" % (self.__class__.__name__, self.file_id, self.name, self.parent_id, self.text_sha1, self.text_size, self.revision)) def _forget_tree_state(self): self.text_sha1 = None def _unchanged(self, previous_ie): """See InventoryEntry._unchanged.""" compatible = super(InventoryFile, self)._unchanged(previous_ie) if self.text_sha1 != previous_ie.text_sha1: compatible = False else: # FIXME: 20050930 probe for the text size when getting sha1 # in _read_tree_state self.text_size = previous_ie.text_size if self.executable != previous_ie.executable: compatible = False return compatible class InventoryLink(InventoryEntry): """A file in an inventory.""" __slots__ = ['symlink_target'] kind = 'symlink' def __init__(self, file_id, name, parent_id): super(InventoryLink, self).__init__(file_id, name, parent_id) self.symlink_target = None def _check(self, checker, tree_revision_id): """See InventoryEntry._check""" if self.symlink_target is None: checker._report_items.append( 'symlink {%s} has no target in revision {%s}' % (self.file_id, tree_revision_id)) # Symlinks are stored as '' checker.add_pending_item(tree_revision_id, ('texts', self.file_id, self.revision), 'text', 'da39a3ee5e6b4b0d3255bfef95601890afd80709') def copy(self): other = InventoryLink(self.file_id, self.name, self.parent_id) other.symlink_target = self.symlink_target other.revision = self.revision return other def detect_changes(self, old_entry): """See InventoryEntry.detect_changes.""" # FIXME: which _modified field should we use ? RBC 20051003 text_modified = (self.symlink_target != old_entry.symlink_target) if text_modified: trace.mutter(" symlink target changed") meta_modified = False return text_modified, meta_modified def _diff(self, text_diff, from_label, tree, to_label, to_entry, to_tree, output_to, reverse=False): """See InventoryEntry._diff.""" from bzrlib.diff import DiffSymlink old_target = self.symlink_target if to_entry is not None: new_target = to_entry.symlink_target else: new_target = None if not reverse: old_tree = tree new_tree = to_tree else: old_tree = to_tree new_tree = tree new_target, old_target = old_target, new_target differ = DiffSymlink(old_tree, new_tree, output_to) return differ.diff_symlink(old_target, new_target) def kind_character(self): """See InventoryEntry.kind_character.""" return '' def _read_tree_state(self, path, work_tree): """See InventoryEntry._read_tree_state.""" self.symlink_target = work_tree.get_symlink_target(self.file_id) def _forget_tree_state(self): self.symlink_target = None def _unchanged(self, previous_ie): """See InventoryEntry._unchanged.""" compatible = super(InventoryLink, self)._unchanged(previous_ie) if self.symlink_target != previous_ie.symlink_target: compatible = False return compatible class TreeReference(InventoryEntry): __slots__ = ['reference_revision'] kind = 'tree-reference' def __init__(self, file_id, name, parent_id, revision=None, reference_revision=None): InventoryEntry.__init__(self, file_id, name, parent_id) self.revision = revision self.reference_revision = reference_revision def copy(self): return TreeReference(self.file_id, self.name, self.parent_id, self.revision, self.reference_revision) def _read_tree_state(self, path, work_tree): """Populate fields in the inventory entry from the given tree. """ self.reference_revision = work_tree.get_reference_revision( self.file_id, path) def _forget_tree_state(self): self.reference_revision = None def _unchanged(self, previous_ie): """See InventoryEntry._unchanged.""" compatible = super(TreeReference, self)._unchanged(previous_ie) if self.reference_revision != previous_ie.reference_revision: compatible = False return compatible class CommonInventory(object): """Basic inventory logic, defined in terms of primitives like has_id. An inventory is the metadata about the contents of a tree. This is broadly a map from file_id to entries such as directories, files, symlinks and tree references. Each entry maintains its own metadata like SHA1 and length for files, or children for a directory. Entries can be looked up either by path or by file_id. InventoryEntry objects must not be modified after they are inserted, other than through the Inventory API. """ def has_filename(self, filename): return bool(self.path2id(filename)) def id2path(self, file_id): """Return as a string the path to file_id. >>> i = Inventory() >>> e = i.add(InventoryDirectory('src-id', 'src', ROOT_ID)) >>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id')) >>> print i.id2path('foo-id') src/foo.c :raises NoSuchId: If file_id is not present in the inventory. """ # get all names, skipping root return '/'.join(reversed( [parent.name for parent in self._iter_file_id_parents(file_id)][:-1])) def iter_entries(self, from_dir=None, recursive=True): """Return (path, entry) pairs, in order by name. :param from_dir: if None, start from the root, otherwise start from this directory (either file-id or entry) :param recursive: recurse into directories or not """ if from_dir is None: if self.root is None: return from_dir = self.root yield '', self.root elif isinstance(from_dir, basestring): from_dir = self[from_dir] # unrolling the recursive called changed the time from # 440ms/663ms (inline/total) to 116ms/116ms children = from_dir.children.items() children.sort() if not recursive: for name, ie in children: yield name, ie return children = collections.deque(children) stack = [(u'', children)] while stack: from_dir_relpath, children = stack[-1] while children: name, ie = children.popleft() # we know that from_dir_relpath never ends in a slash # and 'f' doesn't begin with one, we can do a string op, rather # than the checks of pathjoin(), though this means that all paths # start with a slash path = from_dir_relpath + '/' + name yield path[1:], ie if ie.kind != 'directory': continue # But do this child first new_children = ie.children.items() new_children.sort() new_children = collections.deque(new_children) stack.append((path, new_children)) # Break out of inner loop, so that we start outer loop with child break else: # if we finished all children, pop it off the stack stack.pop() def _preload_cache(self): """Populate any caches, we are about to access all items. The default implementation does nothing, because CommonInventory doesn't have a cache. """ pass def iter_entries_by_dir(self, from_dir=None, specific_file_ids=None, yield_parents=False): """Iterate over the entries in a directory first order. This returns all entries for a directory before returning the entries for children of a directory. This is not lexicographically sorted order, and is a hybrid between depth-first and breadth-first. :param yield_parents: If True, yield the parents from the root leading down to specific_file_ids that have been requested. This has no impact if specific_file_ids is None. :return: This yields (path, entry) pairs """ if specific_file_ids and not isinstance(specific_file_ids, set): specific_file_ids = set(specific_file_ids) # TODO? Perhaps this should return the from_dir so that the root is # yielded? or maybe an option? if from_dir is None and specific_file_ids is None: # They are iterating from the root, and have not specified any # specific entries to look at. All current callers fully consume the # iterator, so we can safely assume we are accessing all entries self._preload_cache() if from_dir is None: if self.root is None: return # Optimize a common case if (not yield_parents and specific_file_ids is not None and len(specific_file_ids) == 1): file_id = list(specific_file_ids)[0] if self.has_id(file_id): yield self.id2path(file_id), self[file_id] return from_dir = self.root if (specific_file_ids is None or yield_parents or self.root.file_id in specific_file_ids): yield u'', self.root elif isinstance(from_dir, basestring): from_dir = self[from_dir] if specific_file_ids is not None: # TODO: jam 20070302 This could really be done as a loop rather # than a bunch of recursive calls. parents = set() byid = self def add_ancestors(file_id): if not byid.has_id(file_id): return parent_id = byid[file_id].parent_id if parent_id is None: return if parent_id not in parents: parents.add(parent_id) add_ancestors(parent_id) for file_id in specific_file_ids: add_ancestors(file_id) else: parents = None stack = [(u'', from_dir)] while stack: cur_relpath, cur_dir = stack.pop() child_dirs = [] for child_name, child_ie in sorted(cur_dir.children.iteritems()): child_relpath = cur_relpath + child_name if (specific_file_ids is None or child_ie.file_id in specific_file_ids or (yield_parents and child_ie.file_id in parents)): yield child_relpath, child_ie if child_ie.kind == 'directory': if parents is None or child_ie.file_id in parents: child_dirs.append((child_relpath+'/', child_ie)) stack.extend(reversed(child_dirs)) def _make_delta(self, old): """Make an inventory delta from two inventories.""" old_ids = set(old) new_ids = set(self) adds = new_ids - old_ids deletes = old_ids - new_ids common = old_ids.intersection(new_ids) delta = [] for file_id in deletes: delta.append((old.id2path(file_id), None, file_id, None)) for file_id in adds: delta.append((None, self.id2path(file_id), file_id, self[file_id])) for file_id in common: if old[file_id] != self[file_id]: delta.append((old.id2path(file_id), self.id2path(file_id), file_id, self[file_id])) return delta def make_entry(self, kind, name, parent_id, file_id=None): """Simple thunk to bzrlib.inventory.make_entry.""" return make_entry(kind, name, parent_id, file_id) def entries(self): """Return list of (path, ie) for all entries except the root. This may be faster than iter_entries. """ accum = [] def descend(dir_ie, dir_path): kids = dir_ie.children.items() kids.sort() for name, ie in kids: child_path = osutils.pathjoin(dir_path, name) accum.append((child_path, ie)) if ie.kind == 'directory': descend(ie, child_path) if self.root is not None: descend(self.root, u'') return accum def path2id(self, relpath): """Walk down through directories to return entry of last component. :param relpath: may be either a list of path components, or a single string, in which case it is automatically split. This returns the entry of the last component in the path, which may be either a file or a directory. Returns None IFF the path is not found. """ if isinstance(relpath, basestring): names = osutils.splitpath(relpath) else: names = relpath try: parent = self.root except errors.NoSuchId: # root doesn't exist yet so nothing else can return None if parent is None: return None for f in names: try: children = getattr(parent, 'children', None) if children is None: return None cie = children[f] parent = cie except KeyError: # or raise an error? return None return parent.file_id def filter(self, specific_fileids): """Get an inventory view filtered against a set of file-ids. Children of directories and parents are included. The result may or may not reference the underlying inventory so it should be treated as immutable. """ interesting_parents = set() for fileid in specific_fileids: try: interesting_parents.update(self.get_idpath(fileid)) except errors.NoSuchId: # This fileid is not in the inventory - that's ok pass entries = self.iter_entries() if self.root is None: return Inventory(root_id=None) other = Inventory(entries.next()[1].file_id) other.root.revision = self.root.revision other.revision_id = self.revision_id directories_to_expand = set() for path, entry in entries: file_id = entry.file_id if (file_id in specific_fileids or entry.parent_id in directories_to_expand): if entry.kind == 'directory': directories_to_expand.add(file_id) elif file_id not in interesting_parents: continue other.add(entry.copy()) return other def get_idpath(self, file_id): """Return a list of file_ids for the path to an entry. The list contains one element for each directory followed by the id of the file itself. So the length of the returned list is equal to the depth of the file in the tree, counting the root directory as depth 1. """ p = [] for parent in self._iter_file_id_parents(file_id): p.insert(0, parent.file_id) return p class Inventory(CommonInventory): """Mutable dict based in-memory inventory. We never store the full path to a file, because renaming a directory implicitly moves all of its contents. This class internally maintains a lookup tree that allows the children under a directory to be returned quickly. >>> inv = Inventory() >>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID)) InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None) >>> inv['123-123'].name 'hello.c' Id's may be looked up from paths: >>> inv.path2id('hello.c') '123-123' >>> inv.has_id('123-123') True There are iterators over the contents: >>> [entry[0] for entry in inv.iter_entries()] ['', u'hello.c'] """ def __init__(self, root_id=ROOT_ID, revision_id=None): """Create or read an inventory. If a working directory is specified, the inventory is read from there. If the file is specified, read from that. If not, the inventory is created empty. The inventory is created with a default root directory, with an id of None. """ if root_id is not None: self._set_root(InventoryDirectory(root_id, u'', None)) else: self.root = None self._byid = {} self.revision_id = revision_id def __repr__(self): # More than one page of ouput is not useful anymore to debug max_len = 2048 closing = '...}' contents = repr(self._byid) if len(contents) > max_len: contents = contents[:(max_len-len(closing))] + closing return "" % (id(self), contents) def apply_delta(self, delta): """Apply a delta to this inventory. See the inventory developers documentation for the theory behind inventory deltas. If delta application fails the inventory is left in an indeterminate state and must not be used. :param delta: A list of changes to apply. After all the changes are applied the final inventory must be internally consistent, but it is ok to supply changes which, if only half-applied would have an invalid result - such as supplying two changes which rename two files, 'A' and 'B' with each other : [('A', 'B', 'A-id', a_entry), ('B', 'A', 'B-id', b_entry)]. Each change is a tuple, of the form (old_path, new_path, file_id, new_entry). When new_path is None, the change indicates the removal of an entry from the inventory and new_entry will be ignored (using None is appropriate). If new_path is not None, then new_entry must be an InventoryEntry instance, which will be incorporated into the inventory (and replace any existing entry with the same file id). When old_path is None, the change indicates the addition of a new entry to the inventory. When neither new_path nor old_path are None, the change is a modification to an entry, such as a rename, reparent, kind change etc. The children attribute of new_entry is ignored. This is because this method preserves children automatically across alterations to the parent of the children, and cases where the parent id of a child is changing require the child to be passed in as a separate change regardless. E.g. in the recursive deletion of a directory - the directory's children must be included in the delta, or the final inventory will be invalid. Note that a file_id must only appear once within a given delta. An AssertionError is raised otherwise. """ # Check that the delta is legal. It would be nice if this could be # done within the loops below but it's safer to validate the delta # before starting to mutate the inventory, as there isn't a rollback # facility. list(_check_delta_unique_ids(_check_delta_unique_new_paths( _check_delta_unique_old_paths(_check_delta_ids_match_entry( _check_delta_ids_are_valid( _check_delta_new_path_entry_both_or_None( delta))))))) children = {} # Remove all affected items which were in the original inventory, # starting with the longest paths, thus ensuring parents are examined # after their children, which means that everything we examine has no # modified children remaining by the time we examine it. for old_path, file_id in sorted(((op, f) for op, np, f, e in delta if op is not None), reverse=True): # Preserve unaltered children of file_id for later reinsertion. file_id_children = getattr(self[file_id], 'children', {}) if len(file_id_children): children[file_id] = file_id_children if self.id2path(file_id) != old_path: raise errors.InconsistentDelta(old_path, file_id, "Entry was at wrong other path %r." % self.id2path(file_id)) # Remove file_id and the unaltered children. If file_id is not # being deleted it will be reinserted back later. self.remove_recursive_id(file_id) # Insert all affected which should be in the new inventory, reattaching # their children if they had any. This is done from shortest path to # longest, ensuring that items which were modified and whose parents in # the resulting inventory were also modified, are inserted after their # parents. for new_path, f, new_entry in sorted((np, f, e) for op, np, f, e in delta if np is not None): if new_entry.kind == 'directory': # Pop the child which to allow detection of children whose # parents were deleted and which were not reattached to a new # parent. replacement = InventoryDirectory(new_entry.file_id, new_entry.name, new_entry.parent_id) replacement.revision = new_entry.revision replacement.children = children.pop(replacement.file_id, {}) new_entry = replacement try: self.add(new_entry) except errors.DuplicateFileId: raise errors.InconsistentDelta(new_path, new_entry.file_id, "New id is already present in target.") except AttributeError: raise errors.InconsistentDelta(new_path, new_entry.file_id, "Parent is not a directory.") if self.id2path(new_entry.file_id) != new_path: raise errors.InconsistentDelta(new_path, new_entry.file_id, "New path is not consistent with parent path.") if len(children): # Get the parent id that was deleted parent_id, children = children.popitem() raise errors.InconsistentDelta("", parent_id, "The file id was deleted but its children were not deleted.") def create_by_apply_delta(self, inventory_delta, new_revision_id, propagate_caches=False): """See CHKInventory.create_by_apply_delta()""" new_inv = self.copy() new_inv.apply_delta(inventory_delta) new_inv.revision_id = new_revision_id return new_inv def _set_root(self, ie): self.root = ie self._byid = {self.root.file_id: self.root} def copy(self): # TODO: jam 20051218 Should copy also copy the revision_id? entries = self.iter_entries() if self.root is None: return Inventory(root_id=None) other = Inventory(entries.next()[1].file_id) other.root.revision = self.root.revision # copy recursively so we know directories will be added before # their children. There are more efficient ways than this... for path, entry in entries: other.add(entry.copy()) return other def __iter__(self): """Iterate over all file-ids.""" return iter(self._byid) def iter_just_entries(self): """Iterate over all entries. Unlike iter_entries(), just the entries are returned (not (path, ie)) and the order of entries is undefined. XXX: We may not want to merge this into bzr.dev. """ if self.root is None: return for _, ie in self._byid.iteritems(): yield ie def __len__(self): """Returns number of entries.""" return len(self._byid) def __getitem__(self, file_id): """Return the entry for given file_id. >>> inv = Inventory() >>> inv.add(InventoryFile('123123', 'hello.c', ROOT_ID)) InventoryFile('123123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None) >>> inv['123123'].name 'hello.c' """ try: return self._byid[file_id] except KeyError: # really we're passing an inventory, not a tree... raise errors.NoSuchId(self, file_id) def get_file_kind(self, file_id): return self._byid[file_id].kind def get_child(self, parent_id, filename): return self[parent_id].children.get(filename) def _add_child(self, entry): """Add an entry to the inventory, without adding it to its parent""" if entry.file_id in self._byid: raise errors.BzrError( "inventory already contains entry with id {%s}" % entry.file_id) self._byid[entry.file_id] = entry for child in getattr(entry, 'children', {}).itervalues(): self._add_child(child) return entry def add(self, entry): """Add entry to inventory. :return: entry """ if entry.file_id in self._byid: raise errors.DuplicateFileId(entry.file_id, self._byid[entry.file_id]) if entry.parent_id is None: self.root = entry else: try: parent = self._byid[entry.parent_id] except KeyError: raise errors.InconsistentDelta("", entry.parent_id, "Parent not in inventory.") if entry.name in parent.children: raise errors.InconsistentDelta( self.id2path(parent.children[entry.name].file_id), entry.file_id, "Path already versioned") parent.children[entry.name] = entry return self._add_child(entry) def add_path(self, relpath, kind, file_id=None, parent_id=None): """Add entry from a path. The immediate parent must already be versioned. Returns the new entry object.""" parts = osutils.splitpath(relpath) if len(parts) == 0: if file_id is None: file_id = generate_ids.gen_root_id() self.root = InventoryDirectory(file_id, '', None) self._byid = {self.root.file_id: self.root} return self.root else: parent_path = parts[:-1] parent_id = self.path2id(parent_path) if parent_id is None: raise errors.NotVersionedError(path=parent_path) ie = make_entry(kind, parts[-1], parent_id, file_id) return self.add(ie) def __delitem__(self, file_id): """Remove entry by id. >>> inv = Inventory() >>> inv.add(InventoryFile('123', 'foo.c', ROOT_ID)) InventoryFile('123', 'foo.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None) >>> inv.has_id('123') True >>> del inv['123'] >>> inv.has_id('123') False """ ie = self[file_id] del self._byid[file_id] if ie.parent_id is not None: del self[ie.parent_id].children[ie.name] def __eq__(self, other): """Compare two sets by comparing their contents. >>> i1 = Inventory() >>> i2 = Inventory() >>> i1 == i2 True >>> i1.add(InventoryFile('123', 'foo', ROOT_ID)) InventoryFile('123', 'foo', parent_id='TREE_ROOT', sha1=None, len=None, revision=None) >>> i1 == i2 False >>> i2.add(InventoryFile('123', 'foo', ROOT_ID)) InventoryFile('123', 'foo', parent_id='TREE_ROOT', sha1=None, len=None, revision=None) >>> i1 == i2 True """ if not isinstance(other, Inventory): return NotImplemented return self._byid == other._byid def __ne__(self, other): return not self.__eq__(other) def __hash__(self): raise ValueError('not hashable') def _iter_file_id_parents(self, file_id): """Yield the parents of file_id up to the root.""" while file_id is not None: try: ie = self._byid[file_id] except KeyError: raise errors.NoSuchId(tree=None, file_id=file_id) yield ie file_id = ie.parent_id def has_id(self, file_id): return (file_id in self._byid) def _make_delta(self, old): """Make an inventory delta from two inventories.""" old_getter = getattr(old, '_byid', old) new_getter = self._byid old_ids = set(old_getter) new_ids = set(new_getter) adds = new_ids - old_ids deletes = old_ids - new_ids if not adds and not deletes: common = new_ids else: common = old_ids.intersection(new_ids) delta = [] for file_id in deletes: delta.append((old.id2path(file_id), None, file_id, None)) for file_id in adds: delta.append((None, self.id2path(file_id), file_id, self[file_id])) for file_id in common: new_ie = new_getter[file_id] old_ie = old_getter[file_id] # If xml_serializer returns the cached InventoryEntries (rather # than always doing .copy()), inlining the 'is' check saves 2.7M # calls to __eq__. Under lsprof this saves 20s => 6s. # It is a minor improvement without lsprof. if old_ie is new_ie or old_ie == new_ie: continue else: delta.append((old.id2path(file_id), self.id2path(file_id), file_id, new_ie)) return delta def remove_recursive_id(self, file_id): """Remove file_id, and children, from the inventory. :param file_id: A file_id to remove. """ to_find_delete = [self._byid[file_id]] to_delete = [] while to_find_delete: ie = to_find_delete.pop() to_delete.append(ie.file_id) if ie.kind == 'directory': to_find_delete.extend(ie.children.values()) for file_id in reversed(to_delete): ie = self[file_id] del self._byid[file_id] if ie.parent_id is not None: del self[ie.parent_id].children[ie.name] else: self.root = None def rename(self, file_id, new_parent_id, new_name): """Move a file within the inventory. This can change either the name, or the parent, or both. This does not move the working file. """ new_name = ensure_normalized_name(new_name) if not is_valid_name(new_name): raise errors.BzrError("not an acceptable filename: %r" % new_name) new_parent = self._byid[new_parent_id] if new_name in new_parent.children: raise errors.BzrError("%r already exists in %r" % (new_name, self.id2path(new_parent_id))) new_parent_idpath = self.get_idpath(new_parent_id) if file_id in new_parent_idpath: raise errors.BzrError( "cannot move directory %r into a subdirectory of itself, %r" % (self.id2path(file_id), self.id2path(new_parent_id))) file_ie = self._byid[file_id] old_parent = self._byid[file_ie.parent_id] # TODO: Don't leave things messed up if this fails del old_parent.children[file_ie.name] new_parent.children[new_name] = file_ie file_ie.name = new_name file_ie.parent_id = new_parent_id def is_root(self, file_id): return self.root is not None and file_id == self.root.file_id class CHKInventory(CommonInventory): """An inventory persisted in a CHK store. By design, a CHKInventory is immutable so many of the methods supported by Inventory - add, rename, apply_delta, etc - are *not* supported. To create a new CHKInventory, use create_by_apply_delta() or from_inventory(), say. Internally, a CHKInventory has one or two CHKMaps: * id_to_entry - a map from (file_id,) => InventoryEntry as bytes * parent_id_basename_to_file_id - a map from (parent_id, basename_utf8) => file_id as bytes The second map is optional and not present in early CHkRepository's. No caching is performed: every method call or item access will perform requests to the storage layer. As such, keep references to objects you want to reuse. """ def __init__(self, search_key_name): CommonInventory.__init__(self) self._fileid_to_entry_cache = {} self._fully_cached = False self._path_to_fileid_cache = {} self._search_key_name = search_key_name self.root_id = None def __eq__(self, other): """Compare two sets by comparing their contents.""" if not isinstance(other, CHKInventory): return NotImplemented this_key = self.id_to_entry.key() other_key = other.id_to_entry.key() this_pid_key = self.parent_id_basename_to_file_id.key() other_pid_key = other.parent_id_basename_to_file_id.key() if None in (this_key, this_pid_key, other_key, other_pid_key): return False return this_key == other_key and this_pid_key == other_pid_key def _entry_to_bytes(self, entry): """Serialise entry as a single bytestring. :param Entry: An inventory entry. :return: A bytestring for the entry. The BNF: ENTRY ::= FILE | DIR | SYMLINK | TREE FILE ::= "file: " COMMON SEP SHA SEP SIZE SEP EXECUTABLE DIR ::= "dir: " COMMON SYMLINK ::= "symlink: " COMMON SEP TARGET_UTF8 TREE ::= "tree: " COMMON REFERENCE_REVISION COMMON ::= FILE_ID SEP PARENT_ID SEP NAME_UTF8 SEP REVISION SEP ::= "\n" """ if entry.parent_id is not None: parent_str = entry.parent_id else: parent_str = '' name_str = entry.name.encode("utf8") if entry.kind == 'file': if entry.executable: exec_str = "Y" else: exec_str = "N" return "file: %s\n%s\n%s\n%s\n%s\n%d\n%s" % ( entry.file_id, parent_str, name_str, entry.revision, entry.text_sha1, entry.text_size, exec_str) elif entry.kind == 'directory': return "dir: %s\n%s\n%s\n%s" % ( entry.file_id, parent_str, name_str, entry.revision) elif entry.kind == 'symlink': return "symlink: %s\n%s\n%s\n%s\n%s" % ( entry.file_id, parent_str, name_str, entry.revision, entry.symlink_target.encode("utf8")) elif entry.kind == 'tree-reference': return "tree: %s\n%s\n%s\n%s\n%s" % ( entry.file_id, parent_str, name_str, entry.revision, entry.reference_revision) else: raise ValueError("unknown kind %r" % entry.kind) def _expand_fileids_to_parents_and_children(self, file_ids): """Give a more wholistic view starting with the given file_ids. For any file_id which maps to a directory, we will include all children of that directory. We will also include all directories which are parents of the given file_ids, but we will not include their children. eg: / # TREE_ROOT foo/ # foo-id baz # baz-id frob/ # frob-id fringle # fringle-id bar/ # bar-id bing # bing-id if given [foo-id] we will include TREE_ROOT as interesting parents and foo-id, baz-id, frob-id, fringle-id As interesting ids. """ interesting = set() # TODO: Pre-pass over the list of fileids to see if anything is already # deserialized in self._fileid_to_entry_cache directories_to_expand = set() children_of_parent_id = {} # It is okay if some of the fileids are missing for entry in self._getitems(file_ids): if entry.kind == 'directory': directories_to_expand.add(entry.file_id) interesting.add(entry.parent_id) children_of_parent_id.setdefault(entry.parent_id, set() ).add(entry.file_id) # Now, interesting has all of the direct parents, but not the # parents of those parents. It also may have some duplicates with # specific_fileids remaining_parents = interesting.difference(file_ids) # When we hit the TREE_ROOT, we'll get an interesting parent of None, # but we don't actually want to recurse into that interesting.add(None) # this will auto-filter it in the loop remaining_parents.discard(None) while remaining_parents: next_parents = set() for entry in self._getitems(remaining_parents): next_parents.add(entry.parent_id) children_of_parent_id.setdefault(entry.parent_id, set() ).add(entry.file_id) # Remove any search tips we've already processed remaining_parents = next_parents.difference(interesting) interesting.update(remaining_parents) # We should probably also .difference(directories_to_expand) interesting.update(file_ids) interesting.discard(None) while directories_to_expand: # Expand directories by looking in the # parent_id_basename_to_file_id map keys = [StaticTuple(f,).intern() for f in directories_to_expand] directories_to_expand = set() items = self.parent_id_basename_to_file_id.iteritems(keys) next_file_ids = set([item[1] for item in items]) next_file_ids = next_file_ids.difference(interesting) interesting.update(next_file_ids) for entry in self._getitems(next_file_ids): if entry.kind == 'directory': directories_to_expand.add(entry.file_id) children_of_parent_id.setdefault(entry.parent_id, set() ).add(entry.file_id) return interesting, children_of_parent_id def filter(self, specific_fileids): """Get an inventory view filtered against a set of file-ids. Children of directories and parents are included. The result may or may not reference the underlying inventory so it should be treated as immutable. """ (interesting, parent_to_children) = self._expand_fileids_to_parents_and_children( specific_fileids) # There is some overlap here, but we assume that all interesting items # are in the _fileid_to_entry_cache because we had to read them to # determine if they were a dir we wanted to recurse, or just a file # This should give us all the entries we'll want to add, so start # adding other = Inventory(self.root_id) other.root.revision = self.root.revision other.revision_id = self.revision_id if not interesting or not parent_to_children: # empty filter, or filtering entrys that don't exist # (if even 1 existed, then we would have populated # parent_to_children with at least the tree root.) return other cache = self._fileid_to_entry_cache remaining_children = collections.deque(parent_to_children[self.root_id]) while remaining_children: file_id = remaining_children.popleft() ie = cache[file_id] if ie.kind == 'directory': ie = ie.copy() # We create a copy to depopulate the .children attribute # TODO: depending on the uses of 'other' we should probably alwyas # '.copy()' to prevent someone from mutating other and # invaliding our internal cache other.add(ie) if file_id in parent_to_children: remaining_children.extend(parent_to_children[file_id]) return other @staticmethod def _bytes_to_utf8name_key(bytes): """Get the file_id, revision_id key out of bytes.""" # We don't normally care about name, except for times when we want # to filter out empty names because of non rich-root... sections = bytes.split('\n') kind, file_id = sections[0].split(': ') return (sections[2], intern(file_id), intern(sections[3])) def _bytes_to_entry(self, bytes): """Deserialise a serialised entry.""" sections = bytes.split('\n') if sections[0].startswith("file: "): result = InventoryFile(sections[0][6:], sections[2].decode('utf8'), sections[1]) result.text_sha1 = sections[4] result.text_size = int(sections[5]) result.executable = sections[6] == "Y" elif sections[0].startswith("dir: "): result = CHKInventoryDirectory(sections[0][5:], sections[2].decode('utf8'), sections[1], self) elif sections[0].startswith("symlink: "): result = InventoryLink(sections[0][9:], sections[2].decode('utf8'), sections[1]) result.symlink_target = sections[4].decode('utf8') elif sections[0].startswith("tree: "): result = TreeReference(sections[0][6:], sections[2].decode('utf8'), sections[1]) result.reference_revision = sections[4] else: raise ValueError("Not a serialised entry %r" % bytes) result.file_id = intern(result.file_id) result.revision = intern(sections[3]) if result.parent_id == '': result.parent_id = None self._fileid_to_entry_cache[result.file_id] = result return result def create_by_apply_delta(self, inventory_delta, new_revision_id, propagate_caches=False): """Create a new CHKInventory by applying inventory_delta to this one. See the inventory developers documentation for the theory behind inventory deltas. :param inventory_delta: The inventory delta to apply. See Inventory.apply_delta for details. :param new_revision_id: The revision id of the resulting CHKInventory. :param propagate_caches: If True, the caches for this inventory are copied to and updated for the result. :return: The new CHKInventory. """ split = osutils.split result = CHKInventory(self._search_key_name) if propagate_caches: # Just propagate the path-to-fileid cache for now result._path_to_fileid_cache = dict(self._path_to_fileid_cache.iteritems()) search_key_func = chk_map.search_key_registry.get(self._search_key_name) self.id_to_entry._ensure_root() maximum_size = self.id_to_entry._root_node.maximum_size result.revision_id = new_revision_id result.id_to_entry = chk_map.CHKMap( self.id_to_entry._store, self.id_to_entry.key(), search_key_func=search_key_func) result.id_to_entry._ensure_root() result.id_to_entry._root_node.set_maximum_size(maximum_size) # Change to apply to the parent_id_basename delta. The dict maps # (parent_id, basename) -> (old_key, new_value). We use a dict because # when a path has its id replaced (e.g. the root is changed, or someone # does bzr mv a b, bzr mv c a, we should output a single change to this # map rather than two. parent_id_basename_delta = {} if self.parent_id_basename_to_file_id is not None: result.parent_id_basename_to_file_id = chk_map.CHKMap( self.parent_id_basename_to_file_id._store, self.parent_id_basename_to_file_id.key(), search_key_func=search_key_func) result.parent_id_basename_to_file_id._ensure_root() self.parent_id_basename_to_file_id._ensure_root() result_p_id_root = result.parent_id_basename_to_file_id._root_node p_id_root = self.parent_id_basename_to_file_id._root_node result_p_id_root.set_maximum_size(p_id_root.maximum_size) result_p_id_root._key_width = p_id_root._key_width else: result.parent_id_basename_to_file_id = None result.root_id = self.root_id id_to_entry_delta = [] # inventory_delta is only traversed once, so we just update the # variable. # Check for repeated file ids inventory_delta = _check_delta_unique_ids(inventory_delta) # Repeated old paths inventory_delta = _check_delta_unique_old_paths(inventory_delta) # Check for repeated new paths inventory_delta = _check_delta_unique_new_paths(inventory_delta) # Check for entries that don't match the fileid inventory_delta = _check_delta_ids_match_entry(inventory_delta) # Check for nonsense fileids inventory_delta = _check_delta_ids_are_valid(inventory_delta) # Check for new_path <-> entry consistency inventory_delta = _check_delta_new_path_entry_both_or_None( inventory_delta) # All changed entries need to have their parents be directories and be # at the right path. This set contains (path, id) tuples. parents = set() # When we delete an item, all the children of it must be either deleted # or altered in their own right. As we batch process the change via # CHKMap.apply_delta, we build a set of things to use to validate the # delta. deletes = set() altered = set() for old_path, new_path, file_id, entry in inventory_delta: # file id changes if new_path == '': result.root_id = file_id if new_path is None: # Make a delete: new_key = None new_value = None # Update caches if propagate_caches: try: del result._path_to_fileid_cache[old_path] except KeyError: pass deletes.add(file_id) else: new_key = StaticTuple(file_id,) new_value = result._entry_to_bytes(entry) # Update caches. It's worth doing this whether # we're propagating the old caches or not. result._path_to_fileid_cache[new_path] = file_id parents.add((split(new_path)[0], entry.parent_id)) if old_path is None: old_key = None else: old_key = StaticTuple(file_id,) if self.id2path(file_id) != old_path: raise errors.InconsistentDelta(old_path, file_id, "Entry was at wrong other path %r." % self.id2path(file_id)) altered.add(file_id) id_to_entry_delta.append(StaticTuple(old_key, new_key, new_value)) if result.parent_id_basename_to_file_id is not None: # parent_id, basename changes if old_path is None: old_key = None else: old_entry = self[file_id] old_key = self._parent_id_basename_key(old_entry) if new_path is None: new_key = None new_value = None else: new_key = self._parent_id_basename_key(entry) new_value = file_id # If the two keys are the same, the value will be unchanged # as its always the file id for this entry. if old_key != new_key: # Transform a change into explicit delete/add preserving # a possible match on the key from a different file id. if old_key is not None: parent_id_basename_delta.setdefault( old_key, [None, None])[0] = old_key if new_key is not None: parent_id_basename_delta.setdefault( new_key, [None, None])[1] = new_value # validate that deletes are complete. for file_id in deletes: entry = self[file_id] if entry.kind != 'directory': continue # This loop could potentially be better by using the id_basename # map to just get the child file ids. for child in entry.children.values(): if child.file_id not in altered: raise errors.InconsistentDelta(self.id2path(child.file_id), child.file_id, "Child not deleted or reparented when " "parent deleted.") result.id_to_entry.apply_delta(id_to_entry_delta) if parent_id_basename_delta: # Transform the parent_id_basename delta data into a linear delta # with only one record for a given key. Optimally this would allow # re-keying, but its simpler to just output that as a delete+add # to spend less time calculating the delta. delta_list = [] for key, (old_key, value) in parent_id_basename_delta.iteritems(): if value is not None: delta_list.append((old_key, key, value)) else: delta_list.append((old_key, None, None)) result.parent_id_basename_to_file_id.apply_delta(delta_list) parents.discard(('', None)) for parent_path, parent in parents: try: if result[parent].kind != 'directory': raise errors.InconsistentDelta(result.id2path(parent), parent, 'Not a directory, but given children') except errors.NoSuchId: raise errors.InconsistentDelta("", parent, "Parent is not present in resulting inventory.") if result.path2id(parent_path) != parent: raise errors.InconsistentDelta(parent_path, parent, "Parent has wrong path %r." % result.path2id(parent_path)) return result @classmethod def deserialise(klass, chk_store, bytes, expected_revision_id): """Deserialise a CHKInventory. :param chk_store: A CHK capable VersionedFiles instance. :param bytes: The serialised bytes. :param expected_revision_id: The revision ID we think this inventory is for. :return: A CHKInventory """ lines = bytes.split('\n') if lines[-1] != '': raise AssertionError('bytes to deserialize must end with an eol') lines.pop() if lines[0] != 'chkinventory:': raise ValueError("not a serialised CHKInventory: %r" % bytes) info = {} allowed_keys = frozenset(['root_id', 'revision_id', 'search_key_name', 'parent_id_basename_to_file_id', 'id_to_entry']) for line in lines[1:]: key, value = line.split(': ', 1) if key not in allowed_keys: raise errors.BzrError('Unknown key in inventory: %r\n%r' % (key, bytes)) if key in info: raise errors.BzrError('Duplicate key in inventory: %r\n%r' % (key, bytes)) info[key] = value revision_id = intern(info['revision_id']) root_id = intern(info['root_id']) search_key_name = intern(info.get('search_key_name', 'plain')) parent_id_basename_to_file_id = intern(info.get( 'parent_id_basename_to_file_id', None)) if not parent_id_basename_to_file_id.startswith('sha1:'): raise ValueError('parent_id_basename_to_file_id should be a sha1' ' key not %r' % (parent_id_basename_to_file_id,)) id_to_entry = info['id_to_entry'] if not id_to_entry.startswith('sha1:'): raise ValueError('id_to_entry should be a sha1' ' key not %r' % (id_to_entry,)) result = CHKInventory(search_key_name) result.revision_id = revision_id result.root_id = root_id search_key_func = chk_map.search_key_registry.get( result._search_key_name) if parent_id_basename_to_file_id is not None: result.parent_id_basename_to_file_id = chk_map.CHKMap( chk_store, StaticTuple(parent_id_basename_to_file_id,), search_key_func=search_key_func) else: result.parent_id_basename_to_file_id = None result.id_to_entry = chk_map.CHKMap(chk_store, StaticTuple(id_to_entry,), search_key_func=search_key_func) if (result.revision_id,) != expected_revision_id: raise ValueError("Mismatched revision id and expected: %r, %r" % (result.revision_id, expected_revision_id)) return result @classmethod def from_inventory(klass, chk_store, inventory, maximum_size=0, search_key_name='plain'): """Create a CHKInventory from an existing inventory. The content of inventory is copied into the chk_store, and a CHKInventory referencing that is returned. :param chk_store: A CHK capable VersionedFiles instance. :param inventory: The inventory to copy. :param maximum_size: The CHKMap node size limit. :param search_key_name: The identifier for the search key function """ result = klass(search_key_name) result.revision_id = inventory.revision_id result.root_id = inventory.root.file_id entry_to_bytes = result._entry_to_bytes parent_id_basename_key = result._parent_id_basename_key id_to_entry_dict = {} parent_id_basename_dict = {} for path, entry in inventory.iter_entries(): key = StaticTuple(entry.file_id,).intern() id_to_entry_dict[key] = entry_to_bytes(entry) p_id_key = parent_id_basename_key(entry) parent_id_basename_dict[p_id_key] = entry.file_id result._populate_from_dicts(chk_store, id_to_entry_dict, parent_id_basename_dict, maximum_size=maximum_size) return result def _populate_from_dicts(self, chk_store, id_to_entry_dict, parent_id_basename_dict, maximum_size): search_key_func = chk_map.search_key_registry.get(self._search_key_name) root_key = chk_map.CHKMap.from_dict(chk_store, id_to_entry_dict, maximum_size=maximum_size, key_width=1, search_key_func=search_key_func) self.id_to_entry = chk_map.CHKMap(chk_store, root_key, search_key_func) root_key = chk_map.CHKMap.from_dict(chk_store, parent_id_basename_dict, maximum_size=maximum_size, key_width=2, search_key_func=search_key_func) self.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store, root_key, search_key_func) def _parent_id_basename_key(self, entry): """Create a key for a entry in a parent_id_basename_to_file_id index.""" if entry.parent_id is not None: parent_id = entry.parent_id else: parent_id = '' return StaticTuple(parent_id, entry.name.encode('utf8')).intern() def __getitem__(self, file_id): """map a single file_id -> InventoryEntry.""" if file_id is None: raise errors.NoSuchId(self, file_id) result = self._fileid_to_entry_cache.get(file_id, None) if result is not None: return result try: return self._bytes_to_entry( self.id_to_entry.iteritems([StaticTuple(file_id,)]).next()[1]) except StopIteration: # really we're passing an inventory, not a tree... raise errors.NoSuchId(self, file_id) def _getitems(self, file_ids): """Similar to __getitem__, but lets you query for multiple. The returned order is undefined. And currently if an item doesn't exist, it isn't included in the output. """ result = [] remaining = [] for file_id in file_ids: entry = self._fileid_to_entry_cache.get(file_id, None) if entry is None: remaining.append(file_id) else: result.append(entry) file_keys = [StaticTuple(f,).intern() for f in remaining] for file_key, value in self.id_to_entry.iteritems(file_keys): entry = self._bytes_to_entry(value) result.append(entry) self._fileid_to_entry_cache[entry.file_id] = entry return result def has_id(self, file_id): # Perhaps have an explicit 'contains' method on CHKMap ? if self._fileid_to_entry_cache.get(file_id, None) is not None: return True return len(list( self.id_to_entry.iteritems([StaticTuple(file_id,)]))) == 1 def is_root(self, file_id): return file_id == self.root_id def _iter_file_id_parents(self, file_id): """Yield the parents of file_id up to the root.""" while file_id is not None: try: ie = self[file_id] except KeyError: raise errors.NoSuchId(tree=self, file_id=file_id) yield ie file_id = ie.parent_id def __iter__(self): """Iterate over all file-ids.""" for key, _ in self.id_to_entry.iteritems(): yield key[-1] def iter_just_entries(self): """Iterate over all entries. Unlike iter_entries(), just the entries are returned (not (path, ie)) and the order of entries is undefined. XXX: We may not want to merge this into bzr.dev. """ for key, entry in self.id_to_entry.iteritems(): file_id = key[0] ie = self._fileid_to_entry_cache.get(file_id, None) if ie is None: ie = self._bytes_to_entry(entry) self._fileid_to_entry_cache[file_id] = ie yield ie def _preload_cache(self): """Make sure all file-ids are in _fileid_to_entry_cache""" if self._fully_cached: return # No need to do it again # The optimal sort order is to use iteritems() directly cache = self._fileid_to_entry_cache for key, entry in self.id_to_entry.iteritems(): file_id = key[0] if file_id not in cache: ie = self._bytes_to_entry(entry) cache[file_id] = ie else: ie = cache[file_id] last_parent_id = last_parent_ie = None pid_items = self.parent_id_basename_to_file_id.iteritems() for key, child_file_id in pid_items: if key == ('', ''): # This is the root if child_file_id != self.root_id: raise ValueError('Data inconsistency detected.' ' We expected data with key ("","") to match' ' the root id, but %s != %s' % (child_file_id, self.root_id)) continue parent_id, basename = key ie = cache[child_file_id] if parent_id == last_parent_id: parent_ie = last_parent_ie else: parent_ie = cache[parent_id] if parent_ie.kind != 'directory': raise ValueError('Data inconsistency detected.' ' An entry in the parent_id_basename_to_file_id map' ' has parent_id {%s} but the kind of that object' ' is %r not "directory"' % (parent_id, parent_ie.kind)) if parent_ie._children is None: parent_ie._children = {} basename = basename.decode('utf-8') if basename in parent_ie._children: existing_ie = parent_ie._children[basename] if existing_ie != ie: raise ValueError('Data inconsistency detected.' ' Two entries with basename %r were found' ' in the parent entry {%s}' % (basename, parent_id)) if basename != ie.name: raise ValueError('Data inconsistency detected.' ' In the parent_id_basename_to_file_id map, file_id' ' {%s} is listed as having basename %r, but in the' ' id_to_entry map it is %r' % (child_file_id, basename, ie.name)) parent_ie._children[basename] = ie self._fully_cached = True def iter_changes(self, basis): """Generate a Tree.iter_changes change list between this and basis. :param basis: Another CHKInventory. :return: An iterator over the changes between self and basis, as per tree.iter_changes(). """ # We want: (file_id, (path_in_source, path_in_target), # changed_content, versioned, parent, name, kind, # executable) for key, basis_value, self_value in \ self.id_to_entry.iter_changes(basis.id_to_entry): file_id = key[0] if basis_value is not None: basis_entry = basis._bytes_to_entry(basis_value) path_in_source = basis.id2path(file_id) basis_parent = basis_entry.parent_id basis_name = basis_entry.name basis_executable = basis_entry.executable else: path_in_source = None basis_parent = None basis_name = None basis_executable = None if self_value is not None: self_entry = self._bytes_to_entry(self_value) path_in_target = self.id2path(file_id) self_parent = self_entry.parent_id self_name = self_entry.name self_executable = self_entry.executable else: path_in_target = None self_parent = None self_name = None self_executable = None if basis_value is None: # add kind = (None, self_entry.kind) versioned = (False, True) elif self_value is None: # delete kind = (basis_entry.kind, None) versioned = (True, False) else: kind = (basis_entry.kind, self_entry.kind) versioned = (True, True) changed_content = False if kind[0] != kind[1]: changed_content = True elif kind[0] == 'file': if (self_entry.text_size != basis_entry.text_size or self_entry.text_sha1 != basis_entry.text_sha1): changed_content = True elif kind[0] == 'symlink': if self_entry.symlink_target != basis_entry.symlink_target: changed_content = True elif kind[0] == 'tree-reference': if (self_entry.reference_revision != basis_entry.reference_revision): changed_content = True parent = (basis_parent, self_parent) name = (basis_name, self_name) executable = (basis_executable, self_executable) if (not changed_content and parent[0] == parent[1] and name[0] == name[1] and executable[0] == executable[1]): # Could happen when only the revision changed for a directory # for instance. continue yield (file_id, (path_in_source, path_in_target), changed_content, versioned, parent, name, kind, executable) def __len__(self): """Return the number of entries in the inventory.""" return len(self.id_to_entry) def _make_delta(self, old): """Make an inventory delta from two inventories.""" if type(old) != CHKInventory: return CommonInventory._make_delta(self, old) delta = [] for key, old_value, self_value in \ self.id_to_entry.iter_changes(old.id_to_entry): file_id = key[0] if old_value is not None: old_path = old.id2path(file_id) else: old_path = None if self_value is not None: entry = self._bytes_to_entry(self_value) self._fileid_to_entry_cache[file_id] = entry new_path = self.id2path(file_id) else: entry = None new_path = None delta.append((old_path, new_path, file_id, entry)) return delta def path2id(self, relpath): """See CommonInventory.path2id().""" # TODO: perhaps support negative hits? if isinstance(relpath, basestring): names = osutils.splitpath(relpath) else: names = relpath if relpath == []: relpath = [""] relpath = osutils.pathjoin(*relpath) result = self._path_to_fileid_cache.get(relpath, None) if result is not None: return result current_id = self.root_id if current_id is None: return None parent_id_index = self.parent_id_basename_to_file_id cur_path = None for basename in names: if cur_path is None: cur_path = basename else: cur_path = cur_path + '/' + basename basename_utf8 = basename.encode('utf8') file_id = self._path_to_fileid_cache.get(cur_path, None) if file_id is None: key_filter = [StaticTuple(current_id, basename_utf8)] items = parent_id_index.iteritems(key_filter) for (parent_id, name_utf8), file_id in items: if parent_id != current_id or name_utf8 != basename_utf8: raise errors.BzrError("corrupt inventory lookup! " "%r %r %r %r" % (parent_id, current_id, name_utf8, basename_utf8)) if file_id is None: return None else: self._path_to_fileid_cache[cur_path] = file_id current_id = file_id return current_id def to_lines(self): """Serialise the inventory to lines.""" lines = ["chkinventory:\n"] if self._search_key_name != 'plain': # custom ordering grouping things that don't change together lines.append('search_key_name: %s\n' % (self._search_key_name,)) lines.append("root_id: %s\n" % self.root_id) lines.append('parent_id_basename_to_file_id: %s\n' % (self.parent_id_basename_to_file_id.key()[0],)) lines.append("revision_id: %s\n" % self.revision_id) lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],)) else: lines.append("revision_id: %s\n" % self.revision_id) lines.append("root_id: %s\n" % self.root_id) if self.parent_id_basename_to_file_id is not None: lines.append('parent_id_basename_to_file_id: %s\n' % (self.parent_id_basename_to_file_id.key()[0],)) lines.append("id_to_entry: %s\n" % (self.id_to_entry.key()[0],)) return lines @property def root(self): """Get the root entry.""" return self[self.root_id] class CHKInventoryDirectory(InventoryDirectory): """A directory in an inventory.""" __slots__ = ['_children', '_chk_inventory'] def __init__(self, file_id, name, parent_id, chk_inventory): # Don't call InventoryDirectory.__init__ - it isn't right for this # class. InventoryEntry.__init__(self, file_id, name, parent_id) self._children = None self._chk_inventory = chk_inventory @property def children(self): """Access the list of children of this directory. With a parent_id_basename_to_file_id index, loads all the children, without loads the entire index. Without is bad. A more sophisticated proxy object might be nice, to allow partial loading of children as well when specific names are accessed. (So path traversal can be written in the obvious way but not examine siblings.). """ if self._children is not None: return self._children # No longer supported if self._chk_inventory.parent_id_basename_to_file_id is None: raise AssertionError("Inventories without" " parent_id_basename_to_file_id are no longer supported") result = {} # XXX: Todo - use proxy objects for the children rather than loading # all when the attribute is referenced. parent_id_index = self._chk_inventory.parent_id_basename_to_file_id child_keys = set() for (parent_id, name_utf8), file_id in parent_id_index.iteritems( key_filter=[StaticTuple(self.file_id,)]): child_keys.add(StaticTuple(file_id,)) cached = set() for file_id_key in child_keys: entry = self._chk_inventory._fileid_to_entry_cache.get( file_id_key[0], None) if entry is not None: result[entry.name] = entry cached.add(file_id_key) child_keys.difference_update(cached) # populate; todo: do by name id_to_entry = self._chk_inventory.id_to_entry for file_id_key, bytes in id_to_entry.iteritems(child_keys): entry = self._chk_inventory._bytes_to_entry(bytes) result[entry.name] = entry self._chk_inventory._fileid_to_entry_cache[file_id_key[0]] = entry self._children = result return result entry_factory = { 'directory': InventoryDirectory, 'file': InventoryFile, 'symlink': InventoryLink, 'tree-reference': TreeReference } def make_entry(kind, name, parent_id, file_id=None): """Create an inventory entry. :param kind: the type of inventory entry to create. :param name: the basename of the entry. :param parent_id: the parent_id of the entry. :param file_id: the file_id to use. if None, one will be created. """ if file_id is None: file_id = generate_ids.gen_file_id(name) name = ensure_normalized_name(name) try: factory = entry_factory[kind] except KeyError: raise errors.BadFileKindError(name, kind) return factory(file_id, name, parent_id) def ensure_normalized_name(name): """Normalize name. :raises InvalidNormalization: When name is not normalized, and cannot be accessed on this platform by the normalized path. :return: The NFC normalised version of name. """ #------- This has been copied to bzrlib.dirstate.DirState.add, please # keep them synchronised. # we dont import normalized_filename directly because we want to be # able to change the implementation at runtime for tests. norm_name, can_access = osutils.normalized_filename(name) if norm_name != name: if can_access: return norm_name else: # TODO: jam 20060701 This would probably be more useful # if the error was raised with the full path raise errors.InvalidNormalization(name) return name _NAME_RE = lazy_regex.lazy_compile(r'^[^/\\]+$') def is_valid_name(name): return bool(_NAME_RE.match(name)) def _check_delta_unique_ids(delta): """Decorate a delta and check that the file ids in it are unique. :return: A generator over delta. """ ids = set() for item in delta: length = len(ids) + 1 ids.add(item[2]) if len(ids) != length: raise errors.InconsistentDelta(item[0] or item[1], item[2], "repeated file_id") yield item def _check_delta_unique_new_paths(delta): """Decorate a delta and check that the new paths in it are unique. :return: A generator over delta. """ paths = set() for item in delta: length = len(paths) + 1 path = item[1] if path is not None: paths.add(path) if len(paths) != length: raise errors.InconsistentDelta(path, item[2], "repeated path") yield item def _check_delta_unique_old_paths(delta): """Decorate a delta and check that the old paths in it are unique. :return: A generator over delta. """ paths = set() for item in delta: length = len(paths) + 1 path = item[0] if path is not None: paths.add(path) if len(paths) != length: raise errors.InconsistentDelta(path, item[2], "repeated path") yield item def _check_delta_ids_are_valid(delta): """Decorate a delta and check that the ids in it are valid. :return: A generator over delta. """ for item in delta: entry = item[3] if item[2] is None: raise errors.InconsistentDelta(item[0] or item[1], item[2], "entry with file_id None %r" % entry) if type(item[2]) != str: raise errors.InconsistentDelta(item[0] or item[1], item[2], "entry with non bytes file_id %r" % entry) yield item def _check_delta_ids_match_entry(delta): """Decorate a delta and check that the ids in it match the entry.file_id. :return: A generator over delta. """ for item in delta: entry = item[3] if entry is not None: if entry.file_id != item[2]: raise errors.InconsistentDelta(item[0] or item[1], item[2], "mismatched id with %r" % entry) yield item def _check_delta_new_path_entry_both_or_None(delta): """Decorate a delta and check that the new_path and entry are paired. :return: A generator over delta. """ for item in delta: new_path = item[1] entry = item[3] if new_path is None and entry is not None: raise errors.InconsistentDelta(item[0], item[1], "Entry with no new_path") if new_path is not None and entry is None: raise errors.InconsistentDelta(new_path, item[1], "new_path with no entry") yield item def mutable_inventory_from_tree(tree): """Create a new inventory that has the same contents as a specified tree. :param tree: Revision tree to create inventory from """ entries = tree.iter_entries_by_dir() inv = Inventory(None, tree.get_revision_id()) for path, inv_entry in entries: inv.add(inv_entry.copy()) return inv bzr-2.7.0/bzrlib/inventory_delta.py0000644000000000000000000003543611673635356015541 0ustar 00000000000000# Copyright (C) 2008, 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Inventory delta serialisation. See doc/developers/inventory.txt for the description of the format. In this module the interesting classes are: - InventoryDeltaSerializer - object to read/write inventory deltas. """ from __future__ import absolute_import __all__ = ['InventoryDeltaSerializer'] from bzrlib import errors from bzrlib.osutils import basename from bzrlib import inventory from bzrlib.revision import NULL_REVISION FORMAT_1 = 'bzr inventory delta v1 (bzr 1.14)' class InventoryDeltaError(errors.BzrError): """An error when serializing or deserializing an inventory delta.""" # Most errors when serializing and deserializing are due to bugs, although # damaged input (i.e. a bug in a different process) could cause # deserialization errors too. internal_error = True class IncompatibleInventoryDelta(errors.BzrError): """The delta could not be deserialised because its contents conflict with the allow_versioned_root or allow_tree_references flags of the deserializer. """ internal_error = False def _directory_content(entry): """Serialize the content component of entry which is a directory. :param entry: An InventoryDirectory. """ return "dir" def _file_content(entry): """Serialize the content component of entry which is a file. :param entry: An InventoryFile. """ if entry.executable: exec_bytes = 'Y' else: exec_bytes = '' size_exec_sha = (entry.text_size, exec_bytes, entry.text_sha1) if None in size_exec_sha: raise InventoryDeltaError('Missing size or sha for %s' % entry.file_id) return "file\x00%d\x00%s\x00%s" % size_exec_sha def _link_content(entry): """Serialize the content component of entry which is a symlink. :param entry: An InventoryLink. """ target = entry.symlink_target if target is None: raise InventoryDeltaError('Missing target for %s' % entry.file_id) return "link\x00%s" % target.encode('utf8') def _reference_content(entry): """Serialize the content component of entry which is a tree-reference. :param entry: A TreeReference. """ tree_revision = entry.reference_revision if tree_revision is None: raise InventoryDeltaError( 'Missing reference revision for %s' % entry.file_id) return "tree\x00%s" % tree_revision def _dir_to_entry(content, name, parent_id, file_id, last_modified, _type=inventory.InventoryDirectory): """Convert a dir content record to an InventoryDirectory.""" result = _type(file_id, name, parent_id) result.revision = last_modified return result def _file_to_entry(content, name, parent_id, file_id, last_modified, _type=inventory.InventoryFile): """Convert a dir content record to an InventoryFile.""" result = _type(file_id, name, parent_id) result.revision = last_modified result.text_size = int(content[1]) result.text_sha1 = content[3] if content[2]: result.executable = True else: result.executable = False return result def _link_to_entry(content, name, parent_id, file_id, last_modified, _type=inventory.InventoryLink): """Convert a link content record to an InventoryLink.""" result = _type(file_id, name, parent_id) result.revision = last_modified result.symlink_target = content[1].decode('utf8') return result def _tree_to_entry(content, name, parent_id, file_id, last_modified, _type=inventory.TreeReference): """Convert a tree content record to a TreeReference.""" result = _type(file_id, name, parent_id) result.revision = last_modified result.reference_revision = content[1] return result class InventoryDeltaSerializer(object): """Serialize inventory deltas.""" def __init__(self, versioned_root, tree_references): """Create an InventoryDeltaSerializer. :param versioned_root: If True, any root entry that is seen is expected to be versioned, and root entries can have any fileid. :param tree_references: If True support tree-reference entries. """ self._versioned_root = versioned_root self._tree_references = tree_references self._entry_to_content = { 'directory': _directory_content, 'file': _file_content, 'symlink': _link_content, } if tree_references: self._entry_to_content['tree-reference'] = _reference_content def delta_to_lines(self, old_name, new_name, delta_to_new): """Return a line sequence for delta_to_new. Both the versioned_root and tree_references flags must be set via require_flags before calling this. :param old_name: A UTF8 revision id for the old inventory. May be NULL_REVISION if there is no older inventory and delta_to_new includes the entire inventory contents. :param new_name: The version name of the inventory we create with this delta. :param delta_to_new: An inventory delta such as Inventory.apply_delta takes. :return: The serialized delta as lines. """ if type(old_name) is not str: raise TypeError('old_name should be str, got %r' % (old_name,)) if type(new_name) is not str: raise TypeError('new_name should be str, got %r' % (new_name,)) lines = ['', '', '', '', ''] to_line = self._delta_item_to_line for delta_item in delta_to_new: line = to_line(delta_item, new_name) if line.__class__ != str: raise InventoryDeltaError( 'to_line generated non-str output %r' % lines[-1]) lines.append(line) lines.sort() lines[0] = "format: %s\n" % FORMAT_1 lines[1] = "parent: %s\n" % old_name lines[2] = "version: %s\n" % new_name lines[3] = "versioned_root: %s\n" % self._serialize_bool( self._versioned_root) lines[4] = "tree_references: %s\n" % self._serialize_bool( self._tree_references) return lines def _serialize_bool(self, value): if value: return "true" else: return "false" def _delta_item_to_line(self, delta_item, new_version): """Convert delta_item to a line.""" oldpath, newpath, file_id, entry = delta_item if newpath is None: # delete oldpath_utf8 = '/' + oldpath.encode('utf8') newpath_utf8 = 'None' parent_id = '' last_modified = NULL_REVISION content = 'deleted\x00\x00' else: if oldpath is None: oldpath_utf8 = 'None' else: oldpath_utf8 = '/' + oldpath.encode('utf8') if newpath == '/': raise AssertionError( "Bad inventory delta: '/' is not a valid newpath " "(should be '') in delta item %r" % (delta_item,)) # TODO: Test real-world utf8 cache hit rate. It may be a win. newpath_utf8 = '/' + newpath.encode('utf8') # Serialize None as '' parent_id = entry.parent_id or '' # Serialize unknown revisions as NULL_REVISION last_modified = entry.revision # special cases for / if newpath_utf8 == '/' and not self._versioned_root: # This is an entry for the root, this inventory does not # support versioned roots. So this must be an unversioned # root, i.e. last_modified == new revision. Otherwise, this # delta is invalid. # Note: the non-rich-root repositories *can* have roots with # file-ids other than TREE_ROOT, e.g. repo formats that use the # xml5 serializer. if last_modified != new_version: raise InventoryDeltaError( 'Version present for / in %s (%s != %s)' % (file_id, last_modified, new_version)) if last_modified is None: raise InventoryDeltaError("no version for fileid %s" % file_id) content = self._entry_to_content[entry.kind](entry) return ("%s\x00%s\x00%s\x00%s\x00%s\x00%s\n" % (oldpath_utf8, newpath_utf8, file_id, parent_id, last_modified, content)) class InventoryDeltaDeserializer(object): """Deserialize inventory deltas.""" def __init__(self, allow_versioned_root=True, allow_tree_references=True): """Create an InventoryDeltaDeserializer. :param versioned_root: If True, any root entry that is seen is expected to be versioned, and root entries can have any fileid. :param tree_references: If True support tree-reference entries. """ self._allow_versioned_root = allow_versioned_root self._allow_tree_references = allow_tree_references def _deserialize_bool(self, value): if value == "true": return True elif value == "false": return False else: raise InventoryDeltaError("value %r is not a bool" % (value,)) def parse_text_bytes(self, bytes): """Parse the text bytes of a serialized inventory delta. If versioned_root and/or tree_references flags were set via require_flags, then the parsed flags must match or a BzrError will be raised. :param bytes: The bytes to parse. This can be obtained by calling delta_to_lines and then doing ''.join(delta_lines). :return: (parent_id, new_id, versioned_root, tree_references, inventory_delta) """ if bytes[-1:] != '\n': last_line = bytes.rsplit('\n', 1)[-1] raise InventoryDeltaError('last line not empty: %r' % (last_line,)) lines = bytes.split('\n')[:-1] # discard the last empty line if not lines or lines[0] != 'format: %s' % FORMAT_1: raise InventoryDeltaError('unknown format %r' % lines[0:1]) if len(lines) < 2 or not lines[1].startswith('parent: '): raise InventoryDeltaError('missing parent: marker') delta_parent_id = lines[1][8:] if len(lines) < 3 or not lines[2].startswith('version: '): raise InventoryDeltaError('missing version: marker') delta_version_id = lines[2][9:] if len(lines) < 4 or not lines[3].startswith('versioned_root: '): raise InventoryDeltaError('missing versioned_root: marker') delta_versioned_root = self._deserialize_bool(lines[3][16:]) if len(lines) < 5 or not lines[4].startswith('tree_references: '): raise InventoryDeltaError('missing tree_references: marker') delta_tree_references = self._deserialize_bool(lines[4][17:]) if (not self._allow_versioned_root and delta_versioned_root): raise IncompatibleInventoryDelta("versioned_root not allowed") result = [] seen_ids = set() line_iter = iter(lines) for i in range(5): line_iter.next() for line in line_iter: (oldpath_utf8, newpath_utf8, file_id, parent_id, last_modified, content) = line.split('\x00', 5) parent_id = parent_id or None if file_id in seen_ids: raise InventoryDeltaError( "duplicate file id in inventory delta %r" % lines) seen_ids.add(file_id) if (newpath_utf8 == '/' and not delta_versioned_root and last_modified != delta_version_id): # Delta claims to be not have a versioned root, yet here's # a root entry with a non-default version. raise InventoryDeltaError("Versioned root found: %r" % line) elif newpath_utf8 != 'None' and last_modified[-1] == ':': # Deletes have a last_modified of null:, but otherwise special # revision ids should not occur. raise InventoryDeltaError('special revisionid found: %r' % line) if content.startswith('tree\x00'): if delta_tree_references is False: raise InventoryDeltaError( "Tree reference found (but header said " "tree_references: false): %r" % line) elif not self._allow_tree_references: raise IncompatibleInventoryDelta( "Tree reference not allowed") if oldpath_utf8 == 'None': oldpath = None elif oldpath_utf8[:1] != '/': raise InventoryDeltaError( "oldpath invalid (does not start with /): %r" % (oldpath_utf8,)) else: oldpath_utf8 = oldpath_utf8[1:] oldpath = oldpath_utf8.decode('utf8') if newpath_utf8 == 'None': newpath = None elif newpath_utf8[:1] != '/': raise InventoryDeltaError( "newpath invalid (does not start with /): %r" % (newpath_utf8,)) else: # Trim leading slash newpath_utf8 = newpath_utf8[1:] newpath = newpath_utf8.decode('utf8') content_tuple = tuple(content.split('\x00')) if content_tuple[0] == 'deleted': entry = None else: entry = _parse_entry( newpath, file_id, parent_id, last_modified, content_tuple) delta_item = (oldpath, newpath, file_id, entry) result.append(delta_item) return (delta_parent_id, delta_version_id, delta_versioned_root, delta_tree_references, result) def _parse_entry(path, file_id, parent_id, last_modified, content): entry_factory = { 'dir': _dir_to_entry, 'file': _file_to_entry, 'link': _link_to_entry, 'tree': _tree_to_entry, } kind = content[0] if path.startswith('/'): raise AssertionError name = basename(path) return entry_factory[content[0]]( content, name, parent_id, file_id, last_modified) bzr-2.7.0/bzrlib/iterablefile.py0000644000000000000000000001745111673403246014746 0ustar 00000000000000# Copyright (C) 2005 Aaron Bentley, Canonical Ltd # # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import class IterableFileBase(object): """Create a file-like object from any iterable""" def __init__(self, iterable): object.__init__(self) self._iter = iterable.__iter__() self._buffer = "" self.done = False def read_n(self, length): """ >>> IterableFileBase(['This ', 'is ', 'a ', 'test.']).read_n(8) 'This is ' """ def test_length(result): if len(result) >= length: return length else: return None return self._read(test_length) def read_to(self, sequence, length=None): """ >>> f = IterableFileBase(['Th\\nis ', 'is \\n', 'a ', 'te\\nst.']) >>> f.read_to('\\n') 'Th\\n' >>> f.read_to('\\n') 'is is \\n' """ def test_contents(result): if length is not None: if len(result) >= length: return length try: return result.index(sequence)+len(sequence) except ValueError: return None return self._read(test_contents) def _read(self, result_length): """ Read data until result satisfies the condition result_length. result_length is a callable that returns None until the condition is satisfied, and returns the length of the result to use when the condition is satisfied. (i.e. it returns the length of the subset of the first condition match.) """ result = self._buffer while result_length(result) is None: try: result += self._iter.next() except StopIteration: self.done = True self._buffer = "" return result output_length = result_length(result) self._buffer = result[output_length:] return result[:output_length] def read_all(self): """ >>> IterableFileBase(['This ', 'is ', 'a ', 'test.']).read_all() 'This is a test.' """ def no_stop(result): return None return self._read(no_stop) def push_back(self, contents): """ >>> f = IterableFileBase(['Th\\nis ', 'is \\n', 'a ', 'te\\nst.']) >>> f.read_to('\\n') 'Th\\n' >>> f.push_back("Sh") >>> f.read_all() 'Shis is \\na te\\nst.' """ self._buffer = contents + self._buffer class IterableFile(object): """This class supplies all File methods that can be implemented cheaply.""" def __init__(self, iterable): object.__init__(self) self._file_base = IterableFileBase(iterable) self._iter = self._make_iterator() self._closed = False self.softspace = 0 def _make_iterator(self): while not self._file_base.done: self._check_closed() result = self._file_base.read_to('\n') if result != '': yield result def _check_closed(self): if self.closed: raise ValueError("File is closed.") def close(self): """ >>> f = IterableFile(['This ', 'is ', 'a ', 'test.']) >>> f.closed False >>> f.close() >>> f.closed True """ self._file_base.done = True self._closed = True closed = property(lambda x: x._closed) def flush(self): """No-op for standard compliance. >>> f = IterableFile([]) >>> f.close() >>> f.flush() Traceback (most recent call last): ValueError: File is closed. """ self._check_closed() def next(self): """Implementation of the iterator protocol's next() >>> f = IterableFile(['This \\n', 'is ', 'a ', 'test.']) >>> f.next() 'This \\n' >>> f.close() >>> f.next() Traceback (most recent call last): ValueError: File is closed. >>> f = IterableFile(['This \\n', 'is ', 'a ', 'test.\\n']) >>> f.next() 'This \\n' >>> f.next() 'is a test.\\n' >>> f.next() Traceback (most recent call last): StopIteration """ self._check_closed() return self._iter.next() def __iter__(self): """ >>> list(IterableFile(['Th\\nis ', 'is \\n', 'a ', 'te\\nst.'])) ['Th\\n', 'is is \\n', 'a te\\n', 'st.'] >>> f = IterableFile(['Th\\nis ', 'is \\n', 'a ', 'te\\nst.']) >>> f.close() >>> list(f) Traceback (most recent call last): ValueError: File is closed. """ return self def read(self, length=None): """ >>> IterableFile(['This ', 'is ', 'a ', 'test.']).read() 'This is a test.' >>> f = IterableFile(['This ', 'is ', 'a ', 'test.']) >>> f.read(10) 'This is a ' >>> f = IterableFile(['This ', 'is ', 'a ', 'test.']) >>> f.close() >>> f.read(10) Traceback (most recent call last): ValueError: File is closed. """ self._check_closed() if length is None: return self._file_base.read_all() else: return self._file_base.read_n(length) def read_to(self, sequence, size=None): """ Read characters until a sequence is found, with optional max size. The specified sequence, if found, will be included in the result >>> f = IterableFile(['Th\\nis ', 'is \\n', 'a ', 'te\\nst.']) >>> f.read_to('i') 'Th\\ni' >>> f.read_to('i') 's i' >>> f.close() >>> f.read_to('i') Traceback (most recent call last): ValueError: File is closed. """ self._check_closed() return self._file_base.read_to(sequence, size) def readline(self, size=None): """ >>> f = IterableFile(['Th\\nis ', 'is \\n', 'a ', 'te\\nst.']) >>> f.readline() 'Th\\n' >>> f.readline(4) 'is i' >>> f.close() >>> f.readline() Traceback (most recent call last): ValueError: File is closed. """ return self.read_to('\n', size) def readlines(self, sizehint=None): """ >>> f = IterableFile(['Th\\nis ', 'is \\n', 'a ', 'te\\nst.']) >>> f.readlines() ['Th\\n', 'is is \\n', 'a te\\n', 'st.'] >>> f = IterableFile(['Th\\nis ', 'is \\n', 'a ', 'te\\nst.']) >>> f.close() >>> f.readlines() Traceback (most recent call last): ValueError: File is closed. """ lines = [] while True: line = self.readline() if line == "": return lines if sizehint is None: lines.append(line) elif len(line) < sizehint: lines.append(line) sizehint -= len(line) else: self._file_base.push_back(line) return lines if __name__ == "__main__": import doctest doctest.testmod() bzr-2.7.0/bzrlib/knit.py0000644000000000000000000043700411673635356013275 0ustar 00000000000000# Copyright (C) 2006-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Knit versionedfile implementation. A knit is a versioned file implementation that supports efficient append only updates. Knit file layout: lifeless: the data file is made up of "delta records". each delta record has a delta header that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of the -expanded data- (ie, the delta applied to the parent). the delta also ends with a end-marker; simply "end VERSION" delta can be line or full contents.a ... the 8's there are the index number of the annotation. version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e 59,59,3 8 8 if ie.executable: 8 e.set('executable', 'yes') 130,130,2 8 if elt.get('executable') == 'yes': 8 ie.executable = True end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad whats in an index: 09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents 09:33 < jrydberg> lifeless: the parents are currently dictionary compressed 09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts) 09:33 < lifeless> right 09:33 < jrydberg> lifeless: the position and size is the range in the data file so the index sequence is the dictionary compressed sequence number used in the deltas to provide line annotation """ from __future__ import absolute_import from cStringIO import StringIO from itertools import izip import operator import os from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import gzip from bzrlib import ( debug, diff, graph as _mod_graph, index as _mod_index, pack, patiencediff, static_tuple, trace, tsort, tuned_gzip, ui, ) from bzrlib.repofmt import pack_repo from bzrlib.i18n import gettext """) from bzrlib import ( annotate, errors, osutils, ) from bzrlib.errors import ( NoSuchFile, InvalidRevisionId, KnitCorrupt, KnitHeaderError, RevisionNotPresent, SHA1KnitCorrupt, ) from bzrlib.osutils import ( contains_whitespace, sha_string, sha_strings, split_lines, ) from bzrlib.versionedfile import ( _KeyRefs, AbsentContentFactory, adapter_registry, ConstantMapper, ContentFactory, sort_groupcompress, VersionedFilesWithFallbacks, ) # TODO: Split out code specific to this format into an associated object. # TODO: Can we put in some kind of value to check that the index and data # files belong together? # TODO: accommodate binaries, perhaps by storing a byte count # TODO: function to check whole file # TODO: atomically append data, then measure backwards from the cursor # position after writing to work out where it was located. we may need to # bypass python file buffering. DATA_SUFFIX = '.knit' INDEX_SUFFIX = '.kndx' _STREAM_MIN_BUFFER_SIZE = 5*1024*1024 class KnitAdapter(object): """Base class for knit record adaption.""" def __init__(self, basis_vf): """Create an adapter which accesses full texts from basis_vf. :param basis_vf: A versioned file to access basis texts of deltas from. May be None for adapters that do not need to access basis texts. """ self._data = KnitVersionedFiles(None, None) self._annotate_factory = KnitAnnotateFactory() self._plain_factory = KnitPlainFactory() self._basis_vf = basis_vf class FTAnnotatedToUnannotated(KnitAdapter): """An adapter from FT annotated knits to unannotated ones.""" def get_bytes(self, factory): annotated_compressed_bytes = factory._raw_record rec, contents = \ self._data._parse_record_unchecked(annotated_compressed_bytes) content = self._annotate_factory.parse_fulltext(contents, rec[1]) size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text()) return bytes class DeltaAnnotatedToUnannotated(KnitAdapter): """An adapter for deltas from annotated to unannotated.""" def get_bytes(self, factory): annotated_compressed_bytes = factory._raw_record rec, contents = \ self._data._parse_record_unchecked(annotated_compressed_bytes) delta = self._annotate_factory.parse_line_delta(contents, rec[1], plain=True) contents = self._plain_factory.lower_line_delta(delta) size, bytes = self._data._record_to_data((rec[1],), rec[3], contents) return bytes class FTAnnotatedToFullText(KnitAdapter): """An adapter from FT annotated knits to unannotated ones.""" def get_bytes(self, factory): annotated_compressed_bytes = factory._raw_record rec, contents = \ self._data._parse_record_unchecked(annotated_compressed_bytes) content, delta = self._annotate_factory.parse_record(factory.key[-1], contents, factory._build_details, None) return ''.join(content.text()) class DeltaAnnotatedToFullText(KnitAdapter): """An adapter for deltas from annotated to unannotated.""" def get_bytes(self, factory): annotated_compressed_bytes = factory._raw_record rec, contents = \ self._data._parse_record_unchecked(annotated_compressed_bytes) delta = self._annotate_factory.parse_line_delta(contents, rec[1], plain=True) compression_parent = factory.parents[0] basis_entry = self._basis_vf.get_record_stream( [compression_parent], 'unordered', True).next() if basis_entry.storage_kind == 'absent': raise errors.RevisionNotPresent(compression_parent, self._basis_vf) basis_chunks = basis_entry.get_bytes_as('chunked') basis_lines = osutils.chunks_to_lines(basis_chunks) # Manually apply the delta because we have one annotated content and # one plain. basis_content = PlainKnitContent(basis_lines, compression_parent) basis_content.apply_delta(delta, rec[1]) basis_content._should_strip_eol = factory._build_details[1] return ''.join(basis_content.text()) class FTPlainToFullText(KnitAdapter): """An adapter from FT plain knits to unannotated ones.""" def get_bytes(self, factory): compressed_bytes = factory._raw_record rec, contents = \ self._data._parse_record_unchecked(compressed_bytes) content, delta = self._plain_factory.parse_record(factory.key[-1], contents, factory._build_details, None) return ''.join(content.text()) class DeltaPlainToFullText(KnitAdapter): """An adapter for deltas from annotated to unannotated.""" def get_bytes(self, factory): compressed_bytes = factory._raw_record rec, contents = \ self._data._parse_record_unchecked(compressed_bytes) delta = self._plain_factory.parse_line_delta(contents, rec[1]) compression_parent = factory.parents[0] # XXX: string splitting overhead. basis_entry = self._basis_vf.get_record_stream( [compression_parent], 'unordered', True).next() if basis_entry.storage_kind == 'absent': raise errors.RevisionNotPresent(compression_parent, self._basis_vf) basis_chunks = basis_entry.get_bytes_as('chunked') basis_lines = osutils.chunks_to_lines(basis_chunks) basis_content = PlainKnitContent(basis_lines, compression_parent) # Manually apply the delta because we have one annotated content and # one plain. content, _ = self._plain_factory.parse_record(rec[1], contents, factory._build_details, basis_content) return ''.join(content.text()) class KnitContentFactory(ContentFactory): """Content factory for streaming from knits. :seealso ContentFactory: """ def __init__(self, key, parents, build_details, sha1, raw_record, annotated, knit=None, network_bytes=None): """Create a KnitContentFactory for key. :param key: The key. :param parents: The parents. :param build_details: The build details as returned from get_build_details. :param sha1: The sha1 expected from the full text of this object. :param raw_record: The bytes of the knit data from disk. :param annotated: True if the raw data is annotated. :param network_bytes: None to calculate the network bytes on demand, not-none if they are already known. """ ContentFactory.__init__(self) self.sha1 = sha1 self.key = key self.parents = parents if build_details[0] == 'line-delta': kind = 'delta' else: kind = 'ft' if annotated: annotated_kind = 'annotated-' else: annotated_kind = '' self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind) self._raw_record = raw_record self._network_bytes = network_bytes self._build_details = build_details self._knit = knit def _create_network_bytes(self): """Create a fully serialised network version for transmission.""" # storage_kind, key, parents, Noeol, raw_record key_bytes = '\x00'.join(self.key) if self.parents is None: parent_bytes = 'None:' else: parent_bytes = '\t'.join('\x00'.join(key) for key in self.parents) if self._build_details[1]: noeol = 'N' else: noeol = ' ' network_bytes = "%s\n%s\n%s\n%s%s" % (self.storage_kind, key_bytes, parent_bytes, noeol, self._raw_record) self._network_bytes = network_bytes def get_bytes_as(self, storage_kind): if storage_kind == self.storage_kind: if self._network_bytes is None: self._create_network_bytes() return self._network_bytes if ('-ft-' in self.storage_kind and storage_kind in ('chunked', 'fulltext')): adapter_key = (self.storage_kind, 'fulltext') adapter_factory = adapter_registry.get(adapter_key) adapter = adapter_factory(None) bytes = adapter.get_bytes(self) if storage_kind == 'chunked': return [bytes] else: return bytes if self._knit is not None: # Not redundant with direct conversion above - that only handles # fulltext cases. if storage_kind == 'chunked': return self._knit.get_lines(self.key[0]) elif storage_kind == 'fulltext': return self._knit.get_text(self.key[0]) raise errors.UnavailableRepresentation(self.key, storage_kind, self.storage_kind) class LazyKnitContentFactory(ContentFactory): """A ContentFactory which can either generate full text or a wire form. :seealso ContentFactory: """ def __init__(self, key, parents, generator, first): """Create a LazyKnitContentFactory. :param key: The key of the record. :param parents: The parents of the record. :param generator: A _ContentMapGenerator containing the record for this key. :param first: Is this the first content object returned from generator? if it is, its storage kind is knit-delta-closure, otherwise it is knit-delta-closure-ref """ self.key = key self.parents = parents self.sha1 = None self._generator = generator self.storage_kind = "knit-delta-closure" if not first: self.storage_kind = self.storage_kind + "-ref" self._first = first def get_bytes_as(self, storage_kind): if storage_kind == self.storage_kind: if self._first: return self._generator._wire_bytes() else: # all the keys etc are contained in the bytes returned in the # first record. return '' if storage_kind in ('chunked', 'fulltext'): chunks = self._generator._get_one_work(self.key).text() if storage_kind == 'chunked': return chunks else: return ''.join(chunks) raise errors.UnavailableRepresentation(self.key, storage_kind, self.storage_kind) def knit_delta_closure_to_records(storage_kind, bytes, line_end): """Convert a network record to a iterator over stream records. :param storage_kind: The storage kind of the record. Must be 'knit-delta-closure'. :param bytes: The bytes of the record on the network. """ generator = _NetworkContentMapGenerator(bytes, line_end) return generator.get_record_stream() def knit_network_to_record(storage_kind, bytes, line_end): """Convert a network record to a record object. :param storage_kind: The storage kind of the record. :param bytes: The bytes of the record on the network. """ start = line_end line_end = bytes.find('\n', start) key = tuple(bytes[start:line_end].split('\x00')) start = line_end + 1 line_end = bytes.find('\n', start) parent_line = bytes[start:line_end] if parent_line == 'None:': parents = None else: parents = tuple( [tuple(segment.split('\x00')) for segment in parent_line.split('\t') if segment]) start = line_end + 1 noeol = bytes[start] == 'N' if 'ft' in storage_kind: method = 'fulltext' else: method = 'line-delta' build_details = (method, noeol) start = start + 1 raw_record = bytes[start:] annotated = 'annotated' in storage_kind return [KnitContentFactory(key, parents, build_details, None, raw_record, annotated, network_bytes=bytes)] class KnitContent(object): """Content of a knit version to which deltas can be applied. This is always stored in memory as a list of lines with \\n at the end, plus a flag saying if the final ending is really there or not, because that corresponds to the on-disk knit representation. """ def __init__(self): self._should_strip_eol = False def apply_delta(self, delta, new_version_id): """Apply delta to this object to become new_version_id.""" raise NotImplementedError(self.apply_delta) def line_delta_iter(self, new_lines): """Generate line-based delta from this content to new_lines.""" new_texts = new_lines.text() old_texts = self.text() s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts) for tag, i1, i2, j1, j2 in s.get_opcodes(): if tag == 'equal': continue # ofrom, oto, length, data yield i1, i2, j2 - j1, new_lines._lines[j1:j2] def line_delta(self, new_lines): return list(self.line_delta_iter(new_lines)) @staticmethod def get_line_delta_blocks(knit_delta, source, target): """Extract SequenceMatcher.get_matching_blocks() from a knit delta""" target_len = len(target) s_pos = 0 t_pos = 0 for s_begin, s_end, t_len, new_text in knit_delta: true_n = s_begin - s_pos n = true_n if n > 0: # knit deltas do not provide reliable info about whether the # last line of a file matches, due to eol handling. if source[s_pos + n -1] != target[t_pos + n -1]: n-=1 if n > 0: yield s_pos, t_pos, n t_pos += t_len + true_n s_pos = s_end n = target_len - t_pos if n > 0: if source[s_pos + n -1] != target[t_pos + n -1]: n-=1 if n > 0: yield s_pos, t_pos, n yield s_pos + (target_len - t_pos), target_len, 0 class AnnotatedKnitContent(KnitContent): """Annotated content.""" def __init__(self, lines): KnitContent.__init__(self) self._lines = lines def annotate(self): """Return a list of (origin, text) for each content line.""" lines = self._lines[:] if self._should_strip_eol: origin, last_line = lines[-1] lines[-1] = (origin, last_line.rstrip('\n')) return lines def apply_delta(self, delta, new_version_id): """Apply delta to this object to become new_version_id.""" offset = 0 lines = self._lines for start, end, count, delta_lines in delta: lines[offset+start:offset+end] = delta_lines offset = offset + (start - end) + count def text(self): try: lines = [text for origin, text in self._lines] except ValueError, e: # most commonly (only?) caused by the internal form of the knit # missing annotation information because of a bug - see thread # around 20071015 raise KnitCorrupt(self, "line in annotated knit missing annotation information: %s" % (e,)) if self._should_strip_eol: lines[-1] = lines[-1].rstrip('\n') return lines def copy(self): return AnnotatedKnitContent(self._lines[:]) class PlainKnitContent(KnitContent): """Unannotated content. When annotate[_iter] is called on this content, the same version is reported for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent objects. """ def __init__(self, lines, version_id): KnitContent.__init__(self) self._lines = lines self._version_id = version_id def annotate(self): """Return a list of (origin, text) for each content line.""" return [(self._version_id, line) for line in self._lines] def apply_delta(self, delta, new_version_id): """Apply delta to this object to become new_version_id.""" offset = 0 lines = self._lines for start, end, count, delta_lines in delta: lines[offset+start:offset+end] = delta_lines offset = offset + (start - end) + count self._version_id = new_version_id def copy(self): return PlainKnitContent(self._lines[:], self._version_id) def text(self): lines = self._lines if self._should_strip_eol: lines = lines[:] lines[-1] = lines[-1].rstrip('\n') return lines class _KnitFactory(object): """Base class for common Factory functions.""" def parse_record(self, version_id, record, record_details, base_content, copy_base_content=True): """Parse a record into a full content object. :param version_id: The official version id for this content :param record: The data returned by read_records_iter() :param record_details: Details about the record returned by get_build_details :param base_content: If get_build_details returns a compression_parent, you must return a base_content here, else use None :param copy_base_content: When building from the base_content, decide you can either copy it and return a new object, or modify it in place. :return: (content, delta) A Content object and possibly a line-delta, delta may be None """ method, noeol = record_details if method == 'line-delta': if copy_base_content: content = base_content.copy() else: content = base_content delta = self.parse_line_delta(record, version_id) content.apply_delta(delta, version_id) else: content = self.parse_fulltext(record, version_id) delta = None content._should_strip_eol = noeol return (content, delta) class KnitAnnotateFactory(_KnitFactory): """Factory for creating annotated Content objects.""" annotated = True def make(self, lines, version_id): num_lines = len(lines) return AnnotatedKnitContent(zip([version_id] * num_lines, lines)) def parse_fulltext(self, content, version_id): """Convert fulltext to internal representation fulltext content is of the format revid(utf8) plaintext\n internal representation is of the format: (revid, plaintext) """ # TODO: jam 20070209 The tests expect this to be returned as tuples, # but the code itself doesn't really depend on that. # Figure out a way to not require the overhead of turning the # list back into tuples. lines = [tuple(line.split(' ', 1)) for line in content] return AnnotatedKnitContent(lines) def parse_line_delta_iter(self, lines): return iter(self.parse_line_delta(lines)) def parse_line_delta(self, lines, version_id, plain=False): """Convert a line based delta into internal representation. line delta is in the form of: intstart intend intcount 1..count lines: revid(utf8) newline\n internal representation is (start, end, count, [1..count tuples (revid, newline)]) :param plain: If True, the lines are returned as a plain list without annotations, not as a list of (origin, content) tuples, i.e. (start, end, count, [1..count newline]) """ result = [] lines = iter(lines) next = lines.next cache = {} def cache_and_return(line): origin, text = line.split(' ', 1) return cache.setdefault(origin, origin), text # walk through the lines parsing. # Note that the plain test is explicitly pulled out of the # loop to minimise any performance impact if plain: for header in lines: start, end, count = [int(n) for n in header.split(',')] contents = [next().split(' ', 1)[1] for i in xrange(count)] result.append((start, end, count, contents)) else: for header in lines: start, end, count = [int(n) for n in header.split(',')] contents = [tuple(next().split(' ', 1)) for i in xrange(count)] result.append((start, end, count, contents)) return result def get_fulltext_content(self, lines): """Extract just the content lines from a fulltext.""" return (line.split(' ', 1)[1] for line in lines) def get_linedelta_content(self, lines): """Extract just the content from a line delta. This doesn't return all of the extra information stored in a delta. Only the actual content lines. """ lines = iter(lines) next = lines.next for header in lines: header = header.split(',') count = int(header[2]) for i in xrange(count): origin, text = next().split(' ', 1) yield text def lower_fulltext(self, content): """convert a fulltext content record into a serializable form. see parse_fulltext which this inverts. """ return ['%s %s' % (o, t) for o, t in content._lines] def lower_line_delta(self, delta): """convert a delta into a serializable form. See parse_line_delta which this inverts. """ # TODO: jam 20070209 We only do the caching thing to make sure that # the origin is a valid utf-8 line, eventually we could remove it out = [] for start, end, c, lines in delta: out.append('%d,%d,%d\n' % (start, end, c)) out.extend(origin + ' ' + text for origin, text in lines) return out def annotate(self, knit, key): content = knit._get_content(key) # adjust for the fact that serialised annotations are only key suffixes # for this factory. if type(key) is tuple: prefix = key[:-1] origins = content.annotate() result = [] for origin, line in origins: result.append((prefix + (origin,), line)) return result else: # XXX: This smells a bit. Why would key ever be a non-tuple here? # Aren't keys defined to be tuples? -- spiv 20080618 return content.annotate() class KnitPlainFactory(_KnitFactory): """Factory for creating plain Content objects.""" annotated = False def make(self, lines, version_id): return PlainKnitContent(lines, version_id) def parse_fulltext(self, content, version_id): """This parses an unannotated fulltext. Note that this is not a noop - the internal representation has (versionid, line) - its just a constant versionid. """ return self.make(content, version_id) def parse_line_delta_iter(self, lines, version_id): cur = 0 num_lines = len(lines) while cur < num_lines: header = lines[cur] cur += 1 start, end, c = [int(n) for n in header.split(',')] yield start, end, c, lines[cur:cur+c] cur += c def parse_line_delta(self, lines, version_id): return list(self.parse_line_delta_iter(lines, version_id)) def get_fulltext_content(self, lines): """Extract just the content lines from a fulltext.""" return iter(lines) def get_linedelta_content(self, lines): """Extract just the content from a line delta. This doesn't return all of the extra information stored in a delta. Only the actual content lines. """ lines = iter(lines) next = lines.next for header in lines: header = header.split(',') count = int(header[2]) for i in xrange(count): yield next() def lower_fulltext(self, content): return content.text() def lower_line_delta(self, delta): out = [] for start, end, c, lines in delta: out.append('%d,%d,%d\n' % (start, end, c)) out.extend(lines) return out def annotate(self, knit, key): annotator = _KnitAnnotator(knit) return annotator.annotate_flat(key) def make_file_factory(annotated, mapper): """Create a factory for creating a file based KnitVersionedFiles. This is only functional enough to run interface tests, it doesn't try to provide a full pack environment. :param annotated: knit annotations are wanted. :param mapper: The mapper from keys to paths. """ def factory(transport): index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True) access = _KnitKeyAccess(transport, mapper) return KnitVersionedFiles(index, access, annotated=annotated) return factory def make_pack_factory(graph, delta, keylength): """Create a factory for creating a pack based VersionedFiles. This is only functional enough to run interface tests, it doesn't try to provide a full pack environment. :param graph: Store a graph. :param delta: Delta compress contents. :param keylength: How long should keys be. """ def factory(transport): parents = graph or delta ref_length = 0 if graph: ref_length += 1 if delta: ref_length += 1 max_delta_chain = 200 else: max_delta_chain = 0 graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length, key_elements=keylength) stream = transport.open_write_stream('newpack') writer = pack.ContainerWriter(stream.write) writer.begin() index = _KnitGraphIndex(graph_index, lambda:True, parents=parents, deltas=delta, add_callback=graph_index.add_nodes) access = pack_repo._DirectPackAccess({}) access.set_writer(writer, graph_index, (transport, 'newpack')) result = KnitVersionedFiles(index, access, max_delta_chain=max_delta_chain) result.stream = stream result.writer = writer return result return factory def cleanup_pack_knit(versioned_files): versioned_files.stream.close() versioned_files.writer.end() def _get_total_build_size(self, keys, positions): """Determine the total bytes to build these keys. (helper function because _KnitGraphIndex and _KndxIndex work the same, but don't inherit from a common base.) :param keys: Keys that we want to build :param positions: dict of {key, (info, index_memo, comp_parent)} (such as returned by _get_components_positions) :return: Number of bytes to build those keys """ all_build_index_memos = {} build_keys = keys while build_keys: next_keys = set() for key in build_keys: # This is mostly for the 'stacked' case # Where we will be getting the data from a fallback if key not in positions: continue _, index_memo, compression_parent = positions[key] all_build_index_memos[key] = index_memo if compression_parent not in all_build_index_memos: next_keys.add(compression_parent) build_keys = next_keys return sum([index_memo[2] for index_memo in all_build_index_memos.itervalues()]) class KnitVersionedFiles(VersionedFilesWithFallbacks): """Storage for many versioned files using knit compression. Backend storage is managed by indices and data objects. :ivar _index: A _KnitGraphIndex or similar that can describe the parents, graph, compression and data location of entries in this KnitVersionedFiles. Note that this is only the index for *this* vfs; if there are fallbacks they must be queried separately. """ def __init__(self, index, data_access, max_delta_chain=200, annotated=False, reload_func=None): """Create a KnitVersionedFiles with index and data_access. :param index: The index for the knit data. :param data_access: The access object to store and retrieve knit records. :param max_delta_chain: The maximum number of deltas to permit during insertion. Set to 0 to prohibit the use of deltas. :param annotated: Set to True to cause annotations to be calculated and stored during insertion. :param reload_func: An function that can be called if we think we need to reload the pack listing and try again. See 'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature. """ self._index = index self._access = data_access self._max_delta_chain = max_delta_chain if annotated: self._factory = KnitAnnotateFactory() else: self._factory = KnitPlainFactory() self._immediate_fallback_vfs = [] self._reload_func = reload_func def __repr__(self): return "%s(%r, %r)" % ( self.__class__.__name__, self._index, self._access) def without_fallbacks(self): """Return a clone of this object without any fallbacks configured.""" return KnitVersionedFiles(self._index, self._access, self._max_delta_chain, self._factory.annotated, self._reload_func) def add_fallback_versioned_files(self, a_versioned_files): """Add a source of texts for texts not present in this knit. :param a_versioned_files: A VersionedFiles object. """ self._immediate_fallback_vfs.append(a_versioned_files) def add_lines(self, key, parents, lines, parent_texts=None, left_matching_blocks=None, nostore_sha=None, random_id=False, check_content=True): """See VersionedFiles.add_lines().""" self._index._check_write_ok() self._check_add(key, lines, random_id, check_content) if parents is None: # The caller might pass None if there is no graph data, but kndx # indexes can't directly store that, so we give them # an empty tuple instead. parents = () line_bytes = ''.join(lines) return self._add(key, lines, parents, parent_texts, left_matching_blocks, nostore_sha, random_id, line_bytes=line_bytes) def _add_text(self, key, parents, text, nostore_sha=None, random_id=False): """See VersionedFiles._add_text().""" self._index._check_write_ok() self._check_add(key, None, random_id, check_content=False) if text.__class__ is not str: raise errors.BzrBadParameterUnicode("text") if parents is None: # The caller might pass None if there is no graph data, but kndx # indexes can't directly store that, so we give them # an empty tuple instead. parents = () return self._add(key, None, parents, None, None, nostore_sha, random_id, line_bytes=text) def _add(self, key, lines, parents, parent_texts, left_matching_blocks, nostore_sha, random_id, line_bytes): """Add a set of lines on top of version specified by parents. Any versions not present will be converted into ghosts. :param lines: A list of strings where each one is a single line (has a single newline at the end of the string) This is now optional (callers can pass None). It is left in its location for backwards compatibility. It should ''.join(lines) must == line_bytes :param line_bytes: A single string containing the content We pass both lines and line_bytes because different routes bring the values to this function. And for memory efficiency, we don't want to have to split/join on-demand. """ # first thing, if the content is something we don't need to store, find # that out. digest = sha_string(line_bytes) if nostore_sha == digest: raise errors.ExistingContent present_parents = [] if parent_texts is None: parent_texts = {} # Do a single query to ascertain parent presence; we only compress # against parents in the same kvf. present_parent_map = self._index.get_parent_map(parents) for parent in parents: if parent in present_parent_map: present_parents.append(parent) # Currently we can only compress against the left most present parent. if (len(present_parents) == 0 or present_parents[0] != parents[0]): delta = False else: # To speed the extract of texts the delta chain is limited # to a fixed number of deltas. This should minimize both # I/O and the time spend applying deltas. delta = self._check_should_delta(present_parents[0]) text_length = len(line_bytes) options = [] no_eol = False # Note: line_bytes is not modified to add a newline, that is tracked # via the no_eol flag. 'lines' *is* modified, because that is the # general values needed by the Content code. if line_bytes and line_bytes[-1] != '\n': options.append('no-eol') no_eol = True # Copy the existing list, or create a new one if lines is None: lines = osutils.split_lines(line_bytes) else: lines = lines[:] # Replace the last line with one that ends in a final newline lines[-1] = lines[-1] + '\n' if lines is None: lines = osutils.split_lines(line_bytes) for element in key[:-1]: if type(element) is not str: raise TypeError("key contains non-strings: %r" % (key,)) if key[-1] is None: key = key[:-1] + ('sha1:' + digest,) elif type(key[-1]) is not str: raise TypeError("key contains non-strings: %r" % (key,)) # Knit hunks are still last-element only version_id = key[-1] content = self._factory.make(lines, version_id) if no_eol: # Hint to the content object that its text() call should strip the # EOL. content._should_strip_eol = True if delta or (self._factory.annotated and len(present_parents) > 0): # Merge annotations from parent texts if needed. delta_hunks = self._merge_annotations(content, present_parents, parent_texts, delta, self._factory.annotated, left_matching_blocks) if delta: options.append('line-delta') store_lines = self._factory.lower_line_delta(delta_hunks) size, bytes = self._record_to_data(key, digest, store_lines) else: options.append('fulltext') # isinstance is slower and we have no hierarchy. if self._factory.__class__ is KnitPlainFactory: # Use the already joined bytes saving iteration time in # _record_to_data. dense_lines = [line_bytes] if no_eol: dense_lines.append('\n') size, bytes = self._record_to_data(key, digest, lines, dense_lines) else: # get mixed annotation + content and feed it into the # serialiser. store_lines = self._factory.lower_fulltext(content) size, bytes = self._record_to_data(key, digest, store_lines) access_memo = self._access.add_raw_records([(key, size)], bytes)[0] self._index.add_records( ((key, options, access_memo, parents),), random_id=random_id) return digest, text_length, content def annotate(self, key): """See VersionedFiles.annotate.""" return self._factory.annotate(self, key) def get_annotator(self): return _KnitAnnotator(self) def check(self, progress_bar=None, keys=None): """See VersionedFiles.check().""" if keys is None: return self._logical_check() else: # At the moment, check does not extra work over get_record_stream return self.get_record_stream(keys, 'unordered', True) def _logical_check(self): # This doesn't actually test extraction of everything, but that will # impact 'bzr check' substantially, and needs to be integrated with # care. However, it does check for the obvious problem of a delta with # no basis. keys = self._index.keys() parent_map = self.get_parent_map(keys) for key in keys: if self._index.get_method(key) != 'fulltext': compression_parent = parent_map[key][0] if compression_parent not in parent_map: raise errors.KnitCorrupt(self, "Missing basis parent %s for %s" % ( compression_parent, key)) for fallback_vfs in self._immediate_fallback_vfs: fallback_vfs.check() def _check_add(self, key, lines, random_id, check_content): """check that version_id and lines are safe to add.""" version_id = key[-1] if version_id is not None: if contains_whitespace(version_id): raise InvalidRevisionId(version_id, self) self.check_not_reserved_id(version_id) # TODO: If random_id==False and the key is already present, we should # probably check that the existing content is identical to what is # being inserted, and otherwise raise an exception. This would make # the bundle code simpler. if check_content: self._check_lines_not_unicode(lines) self._check_lines_are_lines(lines) def _check_header(self, key, line): rec = self._split_header(line) self._check_header_version(rec, key[-1]) return rec def _check_header_version(self, rec, version_id): """Checks the header version on original format knit records. These have the last component of the key embedded in the record. """ if rec[1] != version_id: raise KnitCorrupt(self, 'unexpected version, wanted %r, got %r' % (version_id, rec[1])) def _check_should_delta(self, parent): """Iterate back through the parent listing, looking for a fulltext. This is used when we want to decide whether to add a delta or a new fulltext. It searches for _max_delta_chain parents. When it finds a fulltext parent, it sees if the total size of the deltas leading up to it is large enough to indicate that we want a new full text anyway. Return True if we should create a new delta, False if we should use a full text. """ delta_size = 0 fulltext_size = None for count in xrange(self._max_delta_chain): try: # Note that this only looks in the index of this particular # KnitVersionedFiles, not in the fallbacks. This ensures that # we won't store a delta spanning physical repository # boundaries. build_details = self._index.get_build_details([parent]) parent_details = build_details[parent] except (RevisionNotPresent, KeyError), e: # Some basis is not locally present: always fulltext return False index_memo, compression_parent, _, _ = parent_details _, _, size = index_memo if compression_parent is None: fulltext_size = size break delta_size += size # We don't explicitly check for presence because this is in an # inner loop, and if it's missing it'll fail anyhow. parent = compression_parent else: # We couldn't find a fulltext, so we must create a new one return False # Simple heuristic - if the total I/O wold be greater as a delta than # the originally installed fulltext, we create a new fulltext. return fulltext_size > delta_size def _build_details_to_components(self, build_details): """Convert a build_details tuple to a position tuple.""" # record_details, access_memo, compression_parent return build_details[3], build_details[0], build_details[1] def _get_components_positions(self, keys, allow_missing=False): """Produce a map of position data for the components of keys. This data is intended to be used for retrieving the knit records. A dict of key to (record_details, index_memo, next, parents) is returned. * method is the way referenced data should be applied. * index_memo is the handle to pass to the data access to actually get the data * next is the build-parent of the version, or None for fulltexts. * parents is the version_ids of the parents of this version :param allow_missing: If True do not raise an error on a missing component, just ignore it. """ component_data = {} pending_components = keys while pending_components: build_details = self._index.get_build_details(pending_components) current_components = set(pending_components) pending_components = set() for key, details in build_details.iteritems(): (index_memo, compression_parent, parents, record_details) = details method = record_details[0] if compression_parent is not None: pending_components.add(compression_parent) component_data[key] = self._build_details_to_components(details) missing = current_components.difference(build_details) if missing and not allow_missing: raise errors.RevisionNotPresent(missing.pop(), self) return component_data def _get_content(self, key, parent_texts={}): """Returns a content object that makes up the specified version.""" cached_version = parent_texts.get(key, None) if cached_version is not None: # Ensure the cache dict is valid. if not self.get_parent_map([key]): raise RevisionNotPresent(key, self) return cached_version generator = _VFContentMapGenerator(self, [key]) return generator._get_content(key) def get_parent_map(self, keys): """Get a map of the graph parents of keys. :param keys: The keys to look up parents for. :return: A mapping from keys to parents. Absent keys are absent from the mapping. """ return self._get_parent_map_with_sources(keys)[0] def _get_parent_map_with_sources(self, keys): """Get a map of the parents of keys. :param keys: The keys to look up parents for. :return: A tuple. The first element is a mapping from keys to parents. Absent keys are absent from the mapping. The second element is a list with the locations each key was found in. The first element is the in-this-knit parents, the second the first fallback source, and so on. """ result = {} sources = [self._index] + self._immediate_fallback_vfs source_results = [] missing = set(keys) for source in sources: if not missing: break new_result = source.get_parent_map(missing) source_results.append(new_result) result.update(new_result) missing.difference_update(set(new_result)) return result, source_results def _get_record_map(self, keys, allow_missing=False): """Produce a dictionary of knit records. :return: {key:(record, record_details, digest, next)} * record: data returned from read_records (a KnitContentobject) * record_details: opaque information to pass to parse_record * digest: SHA1 digest of the full text after all steps are done * next: build-parent of the version, i.e. the leftmost ancestor. Will be None if the record is not a delta. :param keys: The keys to build a map for :param allow_missing: If some records are missing, rather than error, just return the data that could be generated. """ raw_map = self._get_record_map_unparsed(keys, allow_missing=allow_missing) return self._raw_map_to_record_map(raw_map) def _raw_map_to_record_map(self, raw_map): """Parse the contents of _get_record_map_unparsed. :return: see _get_record_map. """ result = {} for key in raw_map: data, record_details, next = raw_map[key] content, digest = self._parse_record(key[-1], data) result[key] = content, record_details, digest, next return result def _get_record_map_unparsed(self, keys, allow_missing=False): """Get the raw data for reconstructing keys without parsing it. :return: A dict suitable for parsing via _raw_map_to_record_map. key-> raw_bytes, (method, noeol), compression_parent """ # This retries the whole request if anything fails. Potentially we # could be a bit more selective. We could track the keys whose records # we have successfully found, and then only request the new records # from there. However, _get_components_positions grabs the whole build # chain, which means we'll likely try to grab the same records again # anyway. Also, can the build chains change as part of a pack # operation? We wouldn't want to end up with a broken chain. while True: try: position_map = self._get_components_positions(keys, allow_missing=allow_missing) # key = component_id, r = record_details, i_m = index_memo, # n = next records = [(key, i_m) for key, (r, i_m, n) in position_map.iteritems()] # Sort by the index memo, so that we request records from the # same pack file together, and in forward-sorted order records.sort(key=operator.itemgetter(1)) raw_record_map = {} for key, data in self._read_records_iter_unchecked(records): (record_details, index_memo, next) = position_map[key] raw_record_map[key] = data, record_details, next return raw_record_map except errors.RetryWithNewPacks, e: self._access.reload_or_raise(e) @classmethod def _split_by_prefix(cls, keys): """For the given keys, split them up based on their prefix. To keep memory pressure somewhat under control, split the requests back into per-file-id requests, otherwise "bzr co" extracts the full tree into memory before writing it to disk. This should be revisited if _get_content_maps() can ever cross file-id boundaries. The keys for a given file_id are kept in the same relative order. Ordering between file_ids is not, though prefix_order will return the order that the key was first seen. :param keys: An iterable of key tuples :return: (split_map, prefix_order) split_map A dictionary mapping prefix => keys prefix_order The order that we saw the various prefixes """ split_by_prefix = {} prefix_order = [] for key in keys: if len(key) == 1: prefix = '' else: prefix = key[0] if prefix in split_by_prefix: split_by_prefix[prefix].append(key) else: split_by_prefix[prefix] = [key] prefix_order.append(prefix) return split_by_prefix, prefix_order def _group_keys_for_io(self, keys, non_local_keys, positions, _min_buffer_size=_STREAM_MIN_BUFFER_SIZE): """For the given keys, group them into 'best-sized' requests. The idea is to avoid making 1 request per file, but to never try to unpack an entire 1.5GB source tree in a single pass. Also when possible, we should try to group requests to the same pack file together. :return: list of (keys, non_local) tuples that indicate what keys should be fetched next. """ # TODO: Ideally we would group on 2 factors. We want to extract texts # from the same pack file together, and we want to extract all # the texts for a given build-chain together. Ultimately it # probably needs a better global view. total_keys = len(keys) prefix_split_keys, prefix_order = self._split_by_prefix(keys) prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys) cur_keys = [] cur_non_local = set() cur_size = 0 result = [] sizes = [] for prefix in prefix_order: keys = prefix_split_keys[prefix] non_local = prefix_split_non_local_keys.get(prefix, []) this_size = self._index._get_total_build_size(keys, positions) cur_size += this_size cur_keys.extend(keys) cur_non_local.update(non_local) if cur_size > _min_buffer_size: result.append((cur_keys, cur_non_local)) sizes.append(cur_size) cur_keys = [] cur_non_local = set() cur_size = 0 if cur_keys: result.append((cur_keys, cur_non_local)) sizes.append(cur_size) return result def get_record_stream(self, keys, ordering, include_delta_closure): """Get a stream of records for keys. :param keys: The keys to include. :param ordering: Either 'unordered' or 'topological'. A topologically sorted stream has compression parents strictly before their children. :param include_delta_closure: If True then the closure across any compression parents will be included (in the opaque data). :return: An iterator of ContentFactory objects, each of which is only valid until the iterator is advanced. """ # keys might be a generator keys = set(keys) if not keys: return if not self._index.has_graph: # Cannot sort when no graph has been stored. ordering = 'unordered' remaining_keys = keys while True: try: keys = set(remaining_keys) for content_factory in self._get_remaining_record_stream(keys, ordering, include_delta_closure): remaining_keys.discard(content_factory.key) yield content_factory return except errors.RetryWithNewPacks, e: self._access.reload_or_raise(e) def _get_remaining_record_stream(self, keys, ordering, include_delta_closure): """This function is the 'retry' portion for get_record_stream.""" if include_delta_closure: positions = self._get_components_positions(keys, allow_missing=True) else: build_details = self._index.get_build_details(keys) # map from key to # (record_details, access_memo, compression_parent_key) positions = dict((key, self._build_details_to_components(details)) for key, details in build_details.iteritems()) absent_keys = keys.difference(set(positions)) # There may be more absent keys : if we're missing the basis component # and are trying to include the delta closure. # XXX: We should not ever need to examine remote sources because we do # not permit deltas across versioned files boundaries. if include_delta_closure: needed_from_fallback = set() # Build up reconstructable_keys dict. key:True in this dict means # the key can be reconstructed. reconstructable_keys = {} for key in keys: # the delta chain try: chain = [key, positions[key][2]] except KeyError: needed_from_fallback.add(key) continue result = True while chain[-1] is not None: if chain[-1] in reconstructable_keys: result = reconstructable_keys[chain[-1]] break else: try: chain.append(positions[chain[-1]][2]) except KeyError: # missing basis component needed_from_fallback.add(chain[-1]) result = True break for chain_key in chain[:-1]: reconstructable_keys[chain_key] = result if not result: needed_from_fallback.add(key) # Double index lookups here : need a unified api ? global_map, parent_maps = self._get_parent_map_with_sources(keys) if ordering in ('topological', 'groupcompress'): if ordering == 'topological': # Global topological sort present_keys = tsort.topo_sort(global_map) else: present_keys = sort_groupcompress(global_map) # Now group by source: source_keys = [] current_source = None for key in present_keys: for parent_map in parent_maps: if key in parent_map: key_source = parent_map break if current_source is not key_source: source_keys.append((key_source, [])) current_source = key_source source_keys[-1][1].append(key) else: if ordering != 'unordered': raise AssertionError('valid values for ordering are:' ' "unordered", "groupcompress" or "topological" not: %r' % (ordering,)) # Just group by source; remote sources first. present_keys = [] source_keys = [] for parent_map in reversed(parent_maps): source_keys.append((parent_map, [])) for key in parent_map: present_keys.append(key) source_keys[-1][1].append(key) # We have been requested to return these records in an order that # suits us. So we ask the index to give us an optimally sorted # order. for source, sub_keys in source_keys: if source is parent_maps[0]: # Only sort the keys for this VF self._index._sort_keys_by_io(sub_keys, positions) absent_keys = keys - set(global_map) for key in absent_keys: yield AbsentContentFactory(key) # restrict our view to the keys we can answer. # XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB. # XXX: At that point we need to consider the impact of double reads by # utilising components multiple times. if include_delta_closure: # XXX: get_content_maps performs its own index queries; allow state # to be passed in. non_local_keys = needed_from_fallback - absent_keys for keys, non_local_keys in self._group_keys_for_io(present_keys, non_local_keys, positions): generator = _VFContentMapGenerator(self, keys, non_local_keys, global_map, ordering=ordering) for record in generator.get_record_stream(): yield record else: for source, keys in source_keys: if source is parent_maps[0]: # this KnitVersionedFiles records = [(key, positions[key][1]) for key in keys] for key, raw_data in self._read_records_iter_unchecked(records): (record_details, index_memo, _) = positions[key] yield KnitContentFactory(key, global_map[key], record_details, None, raw_data, self._factory.annotated, None) else: vf = self._immediate_fallback_vfs[parent_maps.index(source) - 1] for record in vf.get_record_stream(keys, ordering, include_delta_closure): yield record def get_sha1s(self, keys): """See VersionedFiles.get_sha1s().""" missing = set(keys) record_map = self._get_record_map(missing, allow_missing=True) result = {} for key, details in record_map.iteritems(): if key not in missing: continue # record entry 2 is the 'digest'. result[key] = details[2] missing.difference_update(set(result)) for source in self._immediate_fallback_vfs: if not missing: break new_result = source.get_sha1s(missing) result.update(new_result) missing.difference_update(set(new_result)) return result def insert_record_stream(self, stream): """Insert a record stream into this container. :param stream: A stream of records to insert. :return: None :seealso VersionedFiles.get_record_stream: """ def get_adapter(adapter_key): try: return adapters[adapter_key] except KeyError: adapter_factory = adapter_registry.get(adapter_key) adapter = adapter_factory(self) adapters[adapter_key] = adapter return adapter delta_types = set() if self._factory.annotated: # self is annotated, we need annotated knits to use directly. annotated = "annotated-" convertibles = [] else: # self is not annotated, but we can strip annotations cheaply. annotated = "" convertibles = set(["knit-annotated-ft-gz"]) if self._max_delta_chain: delta_types.add("knit-annotated-delta-gz") convertibles.add("knit-annotated-delta-gz") # The set of types we can cheaply adapt without needing basis texts. native_types = set() if self._max_delta_chain: native_types.add("knit-%sdelta-gz" % annotated) delta_types.add("knit-%sdelta-gz" % annotated) native_types.add("knit-%sft-gz" % annotated) knit_types = native_types.union(convertibles) adapters = {} # Buffer all index entries that we can't add immediately because their # basis parent is missing. We don't buffer all because generating # annotations may require access to some of the new records. However we # can't generate annotations from new deltas until their basis parent # is present anyway, so we get away with not needing an index that # includes the new keys. # # See about ordering of compression # parents in the records - to be conservative, we insist that all # parents must be present to avoid expanding to a fulltext. # # key = basis_parent, value = index entry to add buffered_index_entries = {} for record in stream: kind = record.storage_kind if kind.startswith('knit-') and kind.endswith('-gz'): # Check that the ID in the header of the raw knit bytes matches # the record metadata. raw_data = record._raw_record df, rec = self._parse_record_header(record.key, raw_data) df.close() buffered = False parents = record.parents if record.storage_kind in delta_types: # TODO: eventually the record itself should track # compression_parent compression_parent = parents[0] else: compression_parent = None # Raise an error when a record is missing. if record.storage_kind == 'absent': raise RevisionNotPresent([record.key], self) elif ((record.storage_kind in knit_types) and (compression_parent is None or not self._immediate_fallback_vfs or self._index.has_key(compression_parent) or not self.has_key(compression_parent))): # we can insert the knit record literally if either it has no # compression parent OR we already have its basis in this kvf # OR the basis is not present even in the fallbacks. In the # last case it will either turn up later in the stream and all # will be well, or it won't turn up at all and we'll raise an # error at the end. # # TODO: self.has_key is somewhat redundant with # self._index.has_key; we really want something that directly # asks if it's only present in the fallbacks. -- mbp 20081119 if record.storage_kind not in native_types: try: adapter_key = (record.storage_kind, "knit-delta-gz") adapter = get_adapter(adapter_key) except KeyError: adapter_key = (record.storage_kind, "knit-ft-gz") adapter = get_adapter(adapter_key) bytes = adapter.get_bytes(record) else: # It's a knit record, it has a _raw_record field (even if # it was reconstituted from a network stream). bytes = record._raw_record options = [record._build_details[0]] if record._build_details[1]: options.append('no-eol') # Just blat it across. # Note: This does end up adding data on duplicate keys. As # modern repositories use atomic insertions this should not # lead to excessive growth in the event of interrupted fetches. # 'knit' repositories may suffer excessive growth, but as a # deprecated format this is tolerable. It can be fixed if # needed by in the kndx index support raising on a duplicate # add with identical parents and options. access_memo = self._access.add_raw_records( [(record.key, len(bytes))], bytes)[0] index_entry = (record.key, options, access_memo, parents) if 'fulltext' not in options: # Not a fulltext, so we need to make sure the compression # parent will also be present. # Note that pack backed knits don't need to buffer here # because they buffer all writes to the transaction level, # but we don't expose that difference at the index level. If # the query here has sufficient cost to show up in # profiling we should do that. # # They're required to be physically in this # KnitVersionedFiles, not in a fallback. if not self._index.has_key(compression_parent): pending = buffered_index_entries.setdefault( compression_parent, []) pending.append(index_entry) buffered = True if not buffered: self._index.add_records([index_entry]) elif record.storage_kind == 'chunked': self.add_lines(record.key, parents, osutils.chunks_to_lines(record.get_bytes_as('chunked'))) else: # Not suitable for direct insertion as a # delta, either because it's not the right format, or this # KnitVersionedFiles doesn't permit deltas (_max_delta_chain == # 0) or because it depends on a base only present in the # fallback kvfs. self._access.flush() try: # Try getting a fulltext directly from the record. bytes = record.get_bytes_as('fulltext') except errors.UnavailableRepresentation: adapter_key = record.storage_kind, 'fulltext' adapter = get_adapter(adapter_key) bytes = adapter.get_bytes(record) lines = split_lines(bytes) try: self.add_lines(record.key, parents, lines) except errors.RevisionAlreadyPresent: pass # Add any records whose basis parent is now available. if not buffered: added_keys = [record.key] while added_keys: key = added_keys.pop(0) if key in buffered_index_entries: index_entries = buffered_index_entries[key] self._index.add_records(index_entries) added_keys.extend( [index_entry[0] for index_entry in index_entries]) del buffered_index_entries[key] if buffered_index_entries: # There were index entries buffered at the end of the stream, # So these need to be added (if the index supports holding such # entries for later insertion) all_entries = [] for key in buffered_index_entries: index_entries = buffered_index_entries[key] all_entries.extend(index_entries) self._index.add_records( all_entries, missing_compression_parents=True) def get_missing_compression_parent_keys(self): """Return an iterable of keys of missing compression parents. Check this after calling insert_record_stream to find out if there are any missing compression parents. If there are, the records that depend on them are not able to be inserted safely. For atomic KnitVersionedFiles built on packs, the transaction should be aborted or suspended - commit will fail at this point. Nonatomic knits will error earlier because they have no staging area to put pending entries into. """ return self._index.get_missing_compression_parents() def iter_lines_added_or_present_in_keys(self, keys, pb=None): """Iterate over the lines in the versioned files from keys. This may return lines from other keys. Each item the returned iterator yields is a tuple of a line and a text version that that line is present in (not introduced in). Ordering of results is in whatever order is most suitable for the underlying storage format. If a progress bar is supplied, it may be used to indicate progress. The caller is responsible for cleaning up progress bars (because this is an iterator). NOTES: * Lines are normalised by the underlying store: they will all have \\n terminators. * Lines are returned in arbitrary order. * If a requested key did not change any lines (or didn't have any lines), it may not be mentioned at all in the result. :param pb: Progress bar supplied by caller. :return: An iterator over (line, key). """ if pb is None: pb = ui.ui_factory.nested_progress_bar() keys = set(keys) total = len(keys) done = False while not done: try: # we don't care about inclusions, the caller cares. # but we need to setup a list of records to visit. # we need key, position, length key_records = [] build_details = self._index.get_build_details(keys) for key, details in build_details.iteritems(): if key in keys: key_records.append((key, details[0])) records_iter = enumerate(self._read_records_iter(key_records)) for (key_idx, (key, data, sha_value)) in records_iter: pb.update(gettext('Walking content'), key_idx, total) compression_parent = build_details[key][1] if compression_parent is None: # fulltext line_iterator = self._factory.get_fulltext_content(data) else: # Delta line_iterator = self._factory.get_linedelta_content(data) # Now that we are yielding the data for this key, remove it # from the list keys.remove(key) # XXX: It might be more efficient to yield (key, # line_iterator) in the future. However for now, this is a # simpler change to integrate into the rest of the # codebase. RBC 20071110 for line in line_iterator: yield line, key done = True except errors.RetryWithNewPacks, e: self._access.reload_or_raise(e) # If there are still keys we've not yet found, we look in the fallback # vfs, and hope to find them there. Note that if the keys are found # but had no changes or no content, the fallback may not return # anything. if keys and not self._immediate_fallback_vfs: # XXX: strictly the second parameter is meant to be the file id # but it's not easily accessible here. raise RevisionNotPresent(keys, repr(self)) for source in self._immediate_fallback_vfs: if not keys: break source_keys = set() for line, key in source.iter_lines_added_or_present_in_keys(keys): source_keys.add(key) yield line, key keys.difference_update(source_keys) pb.update(gettext('Walking content'), total, total) def _make_line_delta(self, delta_seq, new_content): """Generate a line delta from delta_seq and new_content.""" diff_hunks = [] for op in delta_seq.get_opcodes(): if op[0] == 'equal': continue diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]])) return diff_hunks def _merge_annotations(self, content, parents, parent_texts={}, delta=None, annotated=None, left_matching_blocks=None): """Merge annotations for content and generate deltas. This is done by comparing the annotations based on changes to the text and generating a delta on the resulting full texts. If annotations are not being created then a simple delta is created. """ if left_matching_blocks is not None: delta_seq = diff._PrematchedMatcher(left_matching_blocks) else: delta_seq = None if annotated: for parent_key in parents: merge_content = self._get_content(parent_key, parent_texts) if (parent_key == parents[0] and delta_seq is not None): seq = delta_seq else: seq = patiencediff.PatienceSequenceMatcher( None, merge_content.text(), content.text()) for i, j, n in seq.get_matching_blocks(): if n == 0: continue # this copies (origin, text) pairs across to the new # content for any line that matches the last-checked # parent. content._lines[j:j+n] = merge_content._lines[i:i+n] # XXX: Robert says the following block is a workaround for a # now-fixed bug and it can probably be deleted. -- mbp 20080618 if content._lines and content._lines[-1][1][-1] != '\n': # The copied annotation was from a line without a trailing EOL, # reinstate one for the content object, to ensure correct # serialization. line = content._lines[-1][1] + '\n' content._lines[-1] = (content._lines[-1][0], line) if delta: if delta_seq is None: reference_content = self._get_content(parents[0], parent_texts) new_texts = content.text() old_texts = reference_content.text() delta_seq = patiencediff.PatienceSequenceMatcher( None, old_texts, new_texts) return self._make_line_delta(delta_seq, content) def _parse_record(self, version_id, data): """Parse an original format knit record. These have the last element of the key only present in the stored data. """ rec, record_contents = self._parse_record_unchecked(data) self._check_header_version(rec, version_id) return record_contents, rec[3] def _parse_record_header(self, key, raw_data): """Parse a record header for consistency. :return: the header and the decompressor stream. as (stream, header_record) """ df = gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data)) try: # Current serialise rec = self._check_header(key, df.readline()) except Exception, e: raise KnitCorrupt(self, "While reading {%s} got %s(%s)" % (key, e.__class__.__name__, str(e))) return df, rec def _parse_record_unchecked(self, data): # profiling notes: # 4168 calls in 2880 217 internal # 4168 calls to _parse_record_header in 2121 # 4168 calls to readlines in 330 df = gzip.GzipFile(mode='rb', fileobj=StringIO(data)) try: record_contents = df.readlines() except Exception, e: raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" % (data, e.__class__.__name__, str(e))) header = record_contents.pop(0) rec = self._split_header(header) last_line = record_contents.pop() if len(record_contents) != int(rec[2]): raise KnitCorrupt(self, 'incorrect number of lines %s != %s' ' for version {%s} %s' % (len(record_contents), int(rec[2]), rec[1], record_contents)) if last_line != 'end %s\n' % rec[1]: raise KnitCorrupt(self, 'unexpected version end line %r, wanted %r' % (last_line, rec[1])) df.close() return rec, record_contents def _read_records_iter(self, records): """Read text records from data file and yield result. The result will be returned in whatever is the fastest to read. Not by the order requested. Also, multiple requests for the same record will only yield 1 response. :param records: A list of (key, access_memo) entries :return: Yields (key, contents, digest) in the order read, not the order requested """ if not records: return # XXX: This smells wrong, IO may not be getting ordered right. needed_records = sorted(set(records), key=operator.itemgetter(1)) if not needed_records: return # The transport optimizes the fetching as well # (ie, reads continuous ranges.) raw_data = self._access.get_raw_records( [index_memo for key, index_memo in needed_records]) for (key, index_memo), data in \ izip(iter(needed_records), raw_data): content, digest = self._parse_record(key[-1], data) yield key, content, digest def _read_records_iter_raw(self, records): """Read text records from data file and yield raw data. This unpacks enough of the text record to validate the id is as expected but thats all. Each item the iterator yields is (key, bytes, expected_sha1_of_full_text). """ for key, data in self._read_records_iter_unchecked(records): # validate the header (note that we can only use the suffix in # current knit records). df, rec = self._parse_record_header(key, data) df.close() yield key, data, rec[3] def _read_records_iter_unchecked(self, records): """Read text records from data file and yield raw data. No validation is done. Yields tuples of (key, data). """ # setup an iterator of the external records: # uses readv so nice and fast we hope. if len(records): # grab the disk data needed. needed_offsets = [index_memo for key, index_memo in records] raw_records = self._access.get_raw_records(needed_offsets) for key, index_memo in records: data = raw_records.next() yield key, data def _record_to_data(self, key, digest, lines, dense_lines=None): """Convert key, digest, lines into a raw data block. :param key: The key of the record. Currently keys are always serialised using just the trailing component. :param dense_lines: The bytes of lines but in a denser form. For instance, if lines is a list of 1000 bytestrings each ending in \\n, dense_lines may be a list with one line in it, containing all the 1000's lines and their \\n's. Using dense_lines if it is already known is a win because the string join to create bytes in this function spends less time resizing the final string. :return: (len, a StringIO instance with the raw data ready to read.) """ chunks = ["version %s %d %s\n" % (key[-1], len(lines), digest)] chunks.extend(dense_lines or lines) chunks.append("end %s\n" % key[-1]) for chunk in chunks: if type(chunk) is not str: raise AssertionError( 'data must be plain bytes was %s' % type(chunk)) if lines and lines[-1][-1] != '\n': raise ValueError('corrupt lines value %r' % lines) compressed_bytes = tuned_gzip.chunks_to_gzip(chunks) return len(compressed_bytes), compressed_bytes def _split_header(self, line): rec = line.split() if len(rec) != 4: raise KnitCorrupt(self, 'unexpected number of elements in record header') return rec def keys(self): """See VersionedFiles.keys.""" if 'evil' in debug.debug_flags: trace.mutter_callsite(2, "keys scales with size of history") sources = [self._index] + self._immediate_fallback_vfs result = set() for source in sources: result.update(source.keys()) return result class _ContentMapGenerator(object): """Generate texts or expose raw deltas for a set of texts.""" def __init__(self, ordering='unordered'): self._ordering = ordering def _get_content(self, key): """Get the content object for key.""" # Note that _get_content is only called when the _ContentMapGenerator # has been constructed with just one key requested for reconstruction. if key in self.nonlocal_keys: record = self.get_record_stream().next() # Create a content object on the fly lines = osutils.chunks_to_lines(record.get_bytes_as('chunked')) return PlainKnitContent(lines, record.key) else: # local keys we can ask for directly return self._get_one_work(key) def get_record_stream(self): """Get a record stream for the keys requested during __init__.""" for record in self._work(): yield record def _work(self): """Produce maps of text and KnitContents as dicts. :return: (text_map, content_map) where text_map contains the texts for the requested versions and content_map contains the KnitContents. """ # NB: By definition we never need to read remote sources unless texts # are requested from them: we don't delta across stores - and we # explicitly do not want to to prevent data loss situations. if self.global_map is None: self.global_map = self.vf.get_parent_map(self.keys) nonlocal_keys = self.nonlocal_keys missing_keys = set(nonlocal_keys) # Read from remote versioned file instances and provide to our caller. for source in self.vf._immediate_fallback_vfs: if not missing_keys: break # Loop over fallback repositories asking them for texts - ignore # any missing from a particular fallback. for record in source.get_record_stream(missing_keys, self._ordering, True): if record.storage_kind == 'absent': # Not in thie particular stream, may be in one of the # other fallback vfs objects. continue missing_keys.remove(record.key) yield record if self._raw_record_map is None: raise AssertionError('_raw_record_map should have been filled') first = True for key in self.keys: if key in self.nonlocal_keys: continue yield LazyKnitContentFactory(key, self.global_map[key], self, first) first = False def _get_one_work(self, requested_key): # Now, if we have calculated everything already, just return the # desired text. if requested_key in self._contents_map: return self._contents_map[requested_key] # To simplify things, parse everything at once - code that wants one text # probably wants them all. # FUTURE: This function could be improved for the 'extract many' case # by tracking each component and only doing the copy when the number of # children than need to apply delta's to it is > 1 or it is part of the # final output. multiple_versions = len(self.keys) != 1 if self._record_map is None: self._record_map = self.vf._raw_map_to_record_map( self._raw_record_map) record_map = self._record_map # raw_record_map is key: # Have read and parsed records at this point. for key in self.keys: if key in self.nonlocal_keys: # already handled continue components = [] cursor = key while cursor is not None: try: record, record_details, digest, next = record_map[cursor] except KeyError: raise RevisionNotPresent(cursor, self) components.append((cursor, record, record_details, digest)) cursor = next if cursor in self._contents_map: # no need to plan further back components.append((cursor, None, None, None)) break content = None for (component_id, record, record_details, digest) in reversed(components): if component_id in self._contents_map: content = self._contents_map[component_id] else: content, delta = self._factory.parse_record(key[-1], record, record_details, content, copy_base_content=multiple_versions) if multiple_versions: self._contents_map[component_id] = content # digest here is the digest from the last applied component. text = content.text() actual_sha = sha_strings(text) if actual_sha != digest: raise SHA1KnitCorrupt(self, actual_sha, digest, key, text) if multiple_versions: return self._contents_map[requested_key] else: return content def _wire_bytes(self): """Get the bytes to put on the wire for 'key'. The first collection of bytes asked for returns the serialised raw_record_map and the additional details (key, parent) for key. Subsequent calls return just the additional details (key, parent). The wire storage_kind given for the first key is 'knit-delta-closure', For subsequent keys it is 'knit-delta-closure-ref'. :param key: A key from the content generator. :return: Bytes to put on the wire. """ lines = [] # kind marker for dispatch on the far side, lines.append('knit-delta-closure') # Annotated or not if self.vf._factory.annotated: lines.append('annotated') else: lines.append('') # then the list of keys lines.append('\t'.join(['\x00'.join(key) for key in self.keys if key not in self.nonlocal_keys])) # then the _raw_record_map in serialised form: map_byte_list = [] # for each item in the map: # 1 line with key # 1 line with parents if the key is to be yielded (None: for None, '' for ()) # one line with method # one line with noeol # one line with next ('' for None) # one line with byte count of the record bytes # the record bytes for key, (record_bytes, (method, noeol), next) in \ self._raw_record_map.iteritems(): key_bytes = '\x00'.join(key) parents = self.global_map.get(key, None) if parents is None: parent_bytes = 'None:' else: parent_bytes = '\t'.join('\x00'.join(key) for key in parents) method_bytes = method if noeol: noeol_bytes = "T" else: noeol_bytes = "F" if next: next_bytes = '\x00'.join(next) else: next_bytes = '' map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % ( key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes, len(record_bytes), record_bytes)) map_bytes = ''.join(map_byte_list) lines.append(map_bytes) bytes = '\n'.join(lines) return bytes class _VFContentMapGenerator(_ContentMapGenerator): """Content map generator reading from a VersionedFiles object.""" def __init__(self, versioned_files, keys, nonlocal_keys=None, global_map=None, raw_record_map=None, ordering='unordered'): """Create a _ContentMapGenerator. :param versioned_files: The versioned files that the texts are being extracted from. :param keys: The keys to produce content maps for. :param nonlocal_keys: An iterable of keys(possibly intersecting keys) which are known to not be in this knit, but rather in one of the fallback knits. :param global_map: The result of get_parent_map(keys) (or a supermap). This is required if get_record_stream() is to be used. :param raw_record_map: A unparsed raw record map to use for answering contents. """ _ContentMapGenerator.__init__(self, ordering=ordering) # The vf to source data from self.vf = versioned_files # The keys desired self.keys = list(keys) # Keys known to be in fallback vfs objects if nonlocal_keys is None: self.nonlocal_keys = set() else: self.nonlocal_keys = frozenset(nonlocal_keys) # Parents data for keys to be returned in get_record_stream self.global_map = global_map # The chunked lists for self.keys in text form self._text_map = {} # A cache of KnitContent objects used in extracting texts. self._contents_map = {} # All the knit records needed to assemble the requested keys as full # texts. self._record_map = None if raw_record_map is None: self._raw_record_map = self.vf._get_record_map_unparsed(keys, allow_missing=True) else: self._raw_record_map = raw_record_map # the factory for parsing records self._factory = self.vf._factory class _NetworkContentMapGenerator(_ContentMapGenerator): """Content map generator sourced from a network stream.""" def __init__(self, bytes, line_end): """Construct a _NetworkContentMapGenerator from a bytes block.""" self._bytes = bytes self.global_map = {} self._raw_record_map = {} self._contents_map = {} self._record_map = None self.nonlocal_keys = [] # Get access to record parsing facilities self.vf = KnitVersionedFiles(None, None) start = line_end # Annotated or not line_end = bytes.find('\n', start) line = bytes[start:line_end] start = line_end + 1 if line == 'annotated': self._factory = KnitAnnotateFactory() else: self._factory = KnitPlainFactory() # list of keys to emit in get_record_stream line_end = bytes.find('\n', start) line = bytes[start:line_end] start = line_end + 1 self.keys = [ tuple(segment.split('\x00')) for segment in line.split('\t') if segment] # now a loop until the end. XXX: It would be nice if this was just a # bunch of the same records as get_record_stream(..., False) gives, but # there is a decent sized gap stopping that at the moment. end = len(bytes) while start < end: # 1 line with key line_end = bytes.find('\n', start) key = tuple(bytes[start:line_end].split('\x00')) start = line_end + 1 # 1 line with parents (None: for None, '' for ()) line_end = bytes.find('\n', start) line = bytes[start:line_end] if line == 'None:': parents = None else: parents = tuple( [tuple(segment.split('\x00')) for segment in line.split('\t') if segment]) self.global_map[key] = parents start = line_end + 1 # one line with method line_end = bytes.find('\n', start) line = bytes[start:line_end] method = line start = line_end + 1 # one line with noeol line_end = bytes.find('\n', start) line = bytes[start:line_end] noeol = line == "T" start = line_end + 1 # one line with next ('' for None) line_end = bytes.find('\n', start) line = bytes[start:line_end] if not line: next = None else: next = tuple(bytes[start:line_end].split('\x00')) start = line_end + 1 # one line with byte count of the record bytes line_end = bytes.find('\n', start) line = bytes[start:line_end] count = int(line) start = line_end + 1 # the record bytes record_bytes = bytes[start:start+count] start = start + count # put it in the map self._raw_record_map[key] = (record_bytes, (method, noeol), next) def get_record_stream(self): """Get a record stream for for keys requested by the bytestream.""" first = True for key in self.keys: yield LazyKnitContentFactory(key, self.global_map[key], self, first) first = False def _wire_bytes(self): return self._bytes class _KndxIndex(object): """Manages knit index files The index is kept in memory and read on startup, to enable fast lookups of revision information. The cursor of the index file is always pointing to the end, making it easy to append entries. _cache is a cache for fast mapping from version id to a Index object. _history is a cache for fast mapping from indexes to version ids. The index data format is dictionary compressed when it comes to parent references; a index entry may only have parents that with a lover index number. As a result, the index is topological sorted. Duplicate entries may be written to the index for a single version id if this is done then the latter one completely replaces the former: this allows updates to correct version and parent information. Note that the two entries may share the delta, and that successive annotations and references MUST point to the first entry. The index file on disc contains a header, followed by one line per knit record. The same revision can be present in an index file more than once. The first occurrence gets assigned a sequence number starting from 0. The format of a single line is REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n REVISION_ID is a utf8-encoded revision id FLAGS is a comma separated list of flags about the record. Values include no-eol, line-delta, fulltext. BYTE_OFFSET is the ascii representation of the byte offset in the data file that the compressed data starts at. LENGTH is the ascii representation of the length of the data file. PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of REVISION_ID. PARENT_SEQUENCE_ID the ascii representation of the sequence number of a revision id already in the knit that is a parent of REVISION_ID. The ' :' marker is the end of record marker. partial writes: when a write is interrupted to the index file, it will result in a line that does not end in ' :'. If the ' :' is not present at the end of a line, or at the end of the file, then the record that is missing it will be ignored by the parser. When writing new records to the index file, the data is preceded by '\n' to ensure that records always start on new lines even if the last write was interrupted. As a result its normal for the last line in the index to be missing a trailing newline. One can be added with no harmful effects. :ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects, where prefix is e.g. the (fileid,) for .texts instances or () for constant-mapped things like .revisions, and the old state is tuple(history_vector, cache_dict). This is used to prevent having an ABI change with the C extension that reads .kndx files. """ HEADER = "# bzr knit index 8\n" def __init__(self, transport, mapper, get_scope, allow_writes, is_locked): """Create a _KndxIndex on transport using mapper.""" self._transport = transport self._mapper = mapper self._get_scope = get_scope self._allow_writes = allow_writes self._is_locked = is_locked self._reset_cache() self.has_graph = True def add_records(self, records, random_id=False, missing_compression_parents=False): """Add multiple records to the index. :param records: a list of tuples: (key, options, access_memo, parents). :param random_id: If True the ids being added were randomly generated and no check for existence will be performed. :param missing_compression_parents: If True the records being added are only compressed against texts already in the index (or inside records). If False the records all refer to unavailable texts (or texts inside records) as compression parents. """ if missing_compression_parents: # It might be nice to get the edge of the records. But keys isn't # _wrong_. keys = sorted(record[0] for record in records) raise errors.RevisionNotPresent(keys, self) paths = {} for record in records: key = record[0] prefix = key[:-1] path = self._mapper.map(key) + '.kndx' path_keys = paths.setdefault(path, (prefix, [])) path_keys[1].append(record) for path in sorted(paths): prefix, path_keys = paths[path] self._load_prefixes([prefix]) lines = [] orig_history = self._kndx_cache[prefix][1][:] orig_cache = self._kndx_cache[prefix][0].copy() try: for key, options, (_, pos, size), parents in path_keys: if parents is None: # kndx indices cannot be parentless. parents = () line = "\n%s %s %s %s %s :" % ( key[-1], ','.join(options), pos, size, self._dictionary_compress(parents)) if type(line) is not str: raise AssertionError( 'data must be utf8 was %s' % type(line)) lines.append(line) self._cache_key(key, options, pos, size, parents) if len(orig_history): self._transport.append_bytes(path, ''.join(lines)) else: self._init_index(path, lines) except: # If any problems happen, restore the original values and re-raise self._kndx_cache[prefix] = (orig_cache, orig_history) raise def scan_unvalidated_index(self, graph_index): """See _KnitGraphIndex.scan_unvalidated_index.""" # Because kndx files do not support atomic insertion via separate index # files, they do not support this method. raise NotImplementedError(self.scan_unvalidated_index) def get_missing_compression_parents(self): """See _KnitGraphIndex.get_missing_compression_parents.""" # Because kndx files do not support atomic insertion via separate index # files, they do not support this method. raise NotImplementedError(self.get_missing_compression_parents) def _cache_key(self, key, options, pos, size, parent_keys): """Cache a version record in the history array and index cache. This is inlined into _load_data for performance. KEEP IN SYNC. (It saves 60ms, 25% of the __init__ overhead on local 4000 record indexes). """ prefix = key[:-1] version_id = key[-1] # last-element only for compatibilty with the C load_data. parents = tuple(parent[-1] for parent in parent_keys) for parent in parent_keys: if parent[:-1] != prefix: raise ValueError("mismatched prefixes for %r, %r" % ( key, parent_keys)) cache, history = self._kndx_cache[prefix] # only want the _history index to reference the 1st index entry # for version_id if version_id not in cache: index = len(history) history.append(version_id) else: index = cache[version_id][5] cache[version_id] = (version_id, options, pos, size, parents, index) def check_header(self, fp): line = fp.readline() if line == '': # An empty file can actually be treated as though the file doesn't # exist yet. raise errors.NoSuchFile(self) if line != self.HEADER: raise KnitHeaderError(badline=line, filename=self) def _check_read(self): if not self._is_locked(): raise errors.ObjectNotLocked(self) if self._get_scope() != self._scope: self._reset_cache() def _check_write_ok(self): """Assert if not writes are permitted.""" if not self._is_locked(): raise errors.ObjectNotLocked(self) if self._get_scope() != self._scope: self._reset_cache() if self._mode != 'w': raise errors.ReadOnlyObjectDirtiedError(self) def get_build_details(self, keys): """Get the method, index_memo and compression parent for keys. Ghosts are omitted from the result. :param keys: An iterable of keys. :return: A dict of key:(index_memo, compression_parent, parents, record_details). index_memo opaque structure to pass to read_records to extract the raw data compression_parent Content that this record is built upon, may be None parents Logical parents of this node record_details extra information about the content which needs to be passed to Factory.parse_record """ parent_map = self.get_parent_map(keys) result = {} for key in keys: if key not in parent_map: continue # Ghost method = self.get_method(key) parents = parent_map[key] if method == 'fulltext': compression_parent = None else: compression_parent = parents[0] noeol = 'no-eol' in self.get_options(key) index_memo = self.get_position(key) result[key] = (index_memo, compression_parent, parents, (method, noeol)) return result def get_method(self, key): """Return compression method of specified key.""" options = self.get_options(key) if 'fulltext' in options: return 'fulltext' elif 'line-delta' in options: return 'line-delta' else: raise errors.KnitIndexUnknownMethod(self, options) def get_options(self, key): """Return a list representing options. e.g. ['foo', 'bar'] """ prefix, suffix = self._split_key(key) self._load_prefixes([prefix]) try: return self._kndx_cache[prefix][0][suffix][1] except KeyError: raise RevisionNotPresent(key, self) def find_ancestry(self, keys): """See CombinedGraphIndex.find_ancestry()""" prefixes = set(key[:-1] for key in keys) self._load_prefixes(prefixes) result = {} parent_map = {} missing_keys = set() pending_keys = list(keys) # This assumes that keys will not reference parents in a different # prefix, which is accurate so far. while pending_keys: key = pending_keys.pop() if key in parent_map: continue prefix = key[:-1] try: suffix_parents = self._kndx_cache[prefix][0][key[-1]][4] except KeyError: missing_keys.add(key) else: parent_keys = tuple([prefix + (suffix,) for suffix in suffix_parents]) parent_map[key] = parent_keys pending_keys.extend([p for p in parent_keys if p not in parent_map]) return parent_map, missing_keys def get_parent_map(self, keys): """Get a map of the parents of keys. :param keys: The keys to look up parents for. :return: A mapping from keys to parents. Absent keys are absent from the mapping. """ # Parse what we need to up front, this potentially trades off I/O # locality (.kndx and .knit in the same block group for the same file # id) for less checking in inner loops. prefixes = set(key[:-1] for key in keys) self._load_prefixes(prefixes) result = {} for key in keys: prefix = key[:-1] try: suffix_parents = self._kndx_cache[prefix][0][key[-1]][4] except KeyError: pass else: result[key] = tuple(prefix + (suffix,) for suffix in suffix_parents) return result def get_position(self, key): """Return details needed to access the version. :return: a tuple (key, data position, size) to hand to the access logic to get the record. """ prefix, suffix = self._split_key(key) self._load_prefixes([prefix]) entry = self._kndx_cache[prefix][0][suffix] return key, entry[2], entry[3] has_key = _mod_index._has_key_from_parent_map def _init_index(self, path, extra_lines=[]): """Initialize an index.""" sio = StringIO() sio.write(self.HEADER) sio.writelines(extra_lines) sio.seek(0) self._transport.put_file_non_atomic(path, sio, create_parent_dir=True) # self._create_parent_dir) # mode=self._file_mode, # dir_mode=self._dir_mode) def keys(self): """Get all the keys in the collection. The keys are not ordered. """ result = set() # Identify all key prefixes. # XXX: A bit hacky, needs polish. if type(self._mapper) is ConstantMapper: prefixes = [()] else: relpaths = set() for quoted_relpath in self._transport.iter_files_recursive(): path, ext = os.path.splitext(quoted_relpath) relpaths.add(path) prefixes = [self._mapper.unmap(path) for path in relpaths] self._load_prefixes(prefixes) for prefix in prefixes: for suffix in self._kndx_cache[prefix][1]: result.add(prefix + (suffix,)) return result def _load_prefixes(self, prefixes): """Load the indices for prefixes.""" self._check_read() for prefix in prefixes: if prefix not in self._kndx_cache: # the load_data interface writes to these variables. self._cache = {} self._history = [] self._filename = prefix try: path = self._mapper.map(prefix) + '.kndx' fp = self._transport.get(path) try: # _load_data may raise NoSuchFile if the target knit is # completely empty. _load_data(self, fp) finally: fp.close() self._kndx_cache[prefix] = (self._cache, self._history) del self._cache del self._filename del self._history except NoSuchFile: self._kndx_cache[prefix] = ({}, []) if type(self._mapper) is ConstantMapper: # preserve behaviour for revisions.kndx etc. self._init_index(path) del self._cache del self._filename del self._history missing_keys = _mod_index._missing_keys_from_parent_map def _partition_keys(self, keys): """Turn keys into a dict of prefix:suffix_list.""" result = {} for key in keys: prefix_keys = result.setdefault(key[:-1], []) prefix_keys.append(key[-1]) return result def _dictionary_compress(self, keys): """Dictionary compress keys. :param keys: The keys to generate references to. :return: A string representation of keys. keys which are present are dictionary compressed, and others are emitted as fulltext with a '.' prefix. """ if not keys: return '' result_list = [] prefix = keys[0][:-1] cache = self._kndx_cache[prefix][0] for key in keys: if key[:-1] != prefix: # kndx indices cannot refer across partitioned storage. raise ValueError("mismatched prefixes for %r" % keys) if key[-1] in cache: # -- inlined lookup() -- result_list.append(str(cache[key[-1]][5])) # -- end lookup () -- else: result_list.append('.' + key[-1]) return ' '.join(result_list) def _reset_cache(self): # Possibly this should be a LRU cache. A dictionary from key_prefix to # (cache_dict, history_vector) for parsed kndx files. self._kndx_cache = {} self._scope = self._get_scope() allow_writes = self._allow_writes() if allow_writes: self._mode = 'w' else: self._mode = 'r' def _sort_keys_by_io(self, keys, positions): """Figure out an optimal order to read the records for the given keys. Sort keys, grouped by index and sorted by position. :param keys: A list of keys whose records we want to read. This will be sorted 'in-place'. :param positions: A dict, such as the one returned by _get_components_positions() :return: None """ def get_sort_key(key): index_memo = positions[key][1] # Group by prefix and position. index_memo[0] is the key, so it is # (file_id, revision_id) and we don't want to sort on revision_id, # index_memo[1] is the position, and index_memo[2] is the size, # which doesn't matter for the sort return index_memo[0][:-1], index_memo[1] return keys.sort(key=get_sort_key) _get_total_build_size = _get_total_build_size def _split_key(self, key): """Split key into a prefix and suffix.""" return key[:-1], key[-1] class _KnitGraphIndex(object): """A KnitVersionedFiles index layered on GraphIndex.""" def __init__(self, graph_index, is_locked, deltas=False, parents=True, add_callback=None, track_external_parent_refs=False): """Construct a KnitGraphIndex on a graph_index. :param graph_index: An implementation of bzrlib.index.GraphIndex. :param is_locked: A callback to check whether the object should answer queries. :param deltas: Allow delta-compressed records. :param parents: If True, record knits parents, if not do not record parents. :param add_callback: If not None, allow additions to the index and call this callback with a list of added GraphIndex nodes: [(node, value, node_refs), ...] :param is_locked: A callback, returns True if the index is locked and thus usable. :param track_external_parent_refs: If True, record all external parent references parents from added records. These can be retrieved later by calling get_missing_parents(). """ self._add_callback = add_callback self._graph_index = graph_index self._deltas = deltas self._parents = parents if deltas and not parents: # XXX: TODO: Delta tree and parent graph should be conceptually # separate. raise KnitCorrupt(self, "Cannot do delta compression without " "parent tracking.") self.has_graph = parents self._is_locked = is_locked self._missing_compression_parents = set() if track_external_parent_refs: self._key_dependencies = _KeyRefs() else: self._key_dependencies = None def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._graph_index) def add_records(self, records, random_id=False, missing_compression_parents=False): """Add multiple records to the index. This function does not insert data into the Immutable GraphIndex backing the KnitGraphIndex, instead it prepares data for insertion by the caller and checks that it is safe to insert then calls self._add_callback with the prepared GraphIndex nodes. :param records: a list of tuples: (key, options, access_memo, parents). :param random_id: If True the ids being added were randomly generated and no check for existence will be performed. :param missing_compression_parents: If True the records being added are only compressed against texts already in the index (or inside records). If False the records all refer to unavailable texts (or texts inside records) as compression parents. """ if not self._add_callback: raise errors.ReadOnlyError(self) # we hope there are no repositories with inconsistent parentage # anymore. keys = {} compression_parents = set() key_dependencies = self._key_dependencies for (key, options, access_memo, parents) in records: if self._parents: parents = tuple(parents) if key_dependencies is not None: key_dependencies.add_references(key, parents) index, pos, size = access_memo if 'no-eol' in options: value = 'N' else: value = ' ' value += "%d %d" % (pos, size) if not self._deltas: if 'line-delta' in options: raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit") if self._parents: if self._deltas: if 'line-delta' in options: node_refs = (parents, (parents[0],)) if missing_compression_parents: compression_parents.add(parents[0]) else: node_refs = (parents, ()) else: node_refs = (parents, ) else: if parents: raise KnitCorrupt(self, "attempt to add node with parents " "in parentless index.") node_refs = () keys[key] = (value, node_refs) # check for dups if not random_id: present_nodes = self._get_entries(keys) for (index, key, value, node_refs) in present_nodes: parents = node_refs[:1] # Sometimes these are passed as a list rather than a tuple passed = static_tuple.as_tuples(keys[key]) passed_parents = passed[1][:1] if (value[0] != keys[key][0][0] or parents != passed_parents): node_refs = static_tuple.as_tuples(node_refs) raise KnitCorrupt(self, "inconsistent details in add_records" ": %s %s" % ((value, node_refs), passed)) del keys[key] result = [] if self._parents: for key, (value, node_refs) in keys.iteritems(): result.append((key, value, node_refs)) else: for key, (value, node_refs) in keys.iteritems(): result.append((key, value)) self._add_callback(result) if missing_compression_parents: # This may appear to be incorrect (it does not check for # compression parents that are in the existing graph index), # but such records won't have been buffered, so this is # actually correct: every entry when # missing_compression_parents==True either has a missing parent, or # a parent that is one of the keys in records. compression_parents.difference_update(keys) self._missing_compression_parents.update(compression_parents) # Adding records may have satisfied missing compression parents. self._missing_compression_parents.difference_update(keys) def scan_unvalidated_index(self, graph_index): """Inform this _KnitGraphIndex that there is an unvalidated index. This allows this _KnitGraphIndex to keep track of any missing compression parents we may want to have filled in to make those indices valid. :param graph_index: A GraphIndex """ if self._deltas: new_missing = graph_index.external_references(ref_list_num=1) new_missing.difference_update(self.get_parent_map(new_missing)) self._missing_compression_parents.update(new_missing) if self._key_dependencies is not None: # Add parent refs from graph_index (and discard parent refs that # the graph_index has). for node in graph_index.iter_all_entries(): self._key_dependencies.add_references(node[1], node[3][0]) def get_missing_compression_parents(self): """Return the keys of missing compression parents. Missing compression parents occur when a record stream was missing basis texts, or a index was scanned that had missing basis texts. """ return frozenset(self._missing_compression_parents) def get_missing_parents(self): """Return the keys of missing parents.""" # If updating this, you should also update # groupcompress._GCGraphIndex.get_missing_parents # We may have false positives, so filter those out. self._key_dependencies.satisfy_refs_for_keys( self.get_parent_map(self._key_dependencies.get_unsatisfied_refs())) return frozenset(self._key_dependencies.get_unsatisfied_refs()) def _check_read(self): """raise if reads are not permitted.""" if not self._is_locked(): raise errors.ObjectNotLocked(self) def _check_write_ok(self): """Assert if writes are not permitted.""" if not self._is_locked(): raise errors.ObjectNotLocked(self) def _compression_parent(self, an_entry): # return the key that an_entry is compressed against, or None # Grab the second parent list (as deltas implies parents currently) compression_parents = an_entry[3][1] if not compression_parents: return None if len(compression_parents) != 1: raise AssertionError( "Too many compression parents: %r" % compression_parents) return compression_parents[0] def get_build_details(self, keys): """Get the method, index_memo and compression parent for version_ids. Ghosts are omitted from the result. :param keys: An iterable of keys. :return: A dict of key: (index_memo, compression_parent, parents, record_details). index_memo opaque structure to pass to read_records to extract the raw data compression_parent Content that this record is built upon, may be None parents Logical parents of this node record_details extra information about the content which needs to be passed to Factory.parse_record """ self._check_read() result = {} entries = self._get_entries(keys, False) for entry in entries: key = entry[1] if not self._parents: parents = () else: parents = entry[3][0] if not self._deltas: compression_parent_key = None else: compression_parent_key = self._compression_parent(entry) noeol = (entry[2][0] == 'N') if compression_parent_key: method = 'line-delta' else: method = 'fulltext' result[key] = (self._node_to_position(entry), compression_parent_key, parents, (method, noeol)) return result def _get_entries(self, keys, check_present=False): """Get the entries for keys. :param keys: An iterable of index key tuples. """ keys = set(keys) found_keys = set() if self._parents: for node in self._graph_index.iter_entries(keys): yield node found_keys.add(node[1]) else: # adapt parentless index to the rest of the code. for node in self._graph_index.iter_entries(keys): yield node[0], node[1], node[2], () found_keys.add(node[1]) if check_present: missing_keys = keys.difference(found_keys) if missing_keys: raise RevisionNotPresent(missing_keys.pop(), self) def get_method(self, key): """Return compression method of specified key.""" return self._get_method(self._get_node(key)) def _get_method(self, node): if not self._deltas: return 'fulltext' if self._compression_parent(node): return 'line-delta' else: return 'fulltext' def _get_node(self, key): try: return list(self._get_entries([key]))[0] except IndexError: raise RevisionNotPresent(key, self) def get_options(self, key): """Return a list representing options. e.g. ['foo', 'bar'] """ node = self._get_node(key) options = [self._get_method(node)] if node[2][0] == 'N': options.append('no-eol') return options def find_ancestry(self, keys): """See CombinedGraphIndex.find_ancestry()""" return self._graph_index.find_ancestry(keys, 0) def get_parent_map(self, keys): """Get a map of the parents of keys. :param keys: The keys to look up parents for. :return: A mapping from keys to parents. Absent keys are absent from the mapping. """ self._check_read() nodes = self._get_entries(keys) result = {} if self._parents: for node in nodes: result[node[1]] = node[3][0] else: for node in nodes: result[node[1]] = None return result def get_position(self, key): """Return details needed to access the version. :return: a tuple (index, data position, size) to hand to the access logic to get the record. """ node = self._get_node(key) return self._node_to_position(node) has_key = _mod_index._has_key_from_parent_map def keys(self): """Get all the keys in the collection. The keys are not ordered. """ self._check_read() return [node[1] for node in self._graph_index.iter_all_entries()] missing_keys = _mod_index._missing_keys_from_parent_map def _node_to_position(self, node): """Convert an index value to position details.""" bits = node[2][1:].split(' ') return node[0], int(bits[0]), int(bits[1]) def _sort_keys_by_io(self, keys, positions): """Figure out an optimal order to read the records for the given keys. Sort keys, grouped by index and sorted by position. :param keys: A list of keys whose records we want to read. This will be sorted 'in-place'. :param positions: A dict, such as the one returned by _get_components_positions() :return: None """ def get_index_memo(key): # index_memo is at offset [1]. It is made up of (GraphIndex, # position, size). GI is an object, which will be unique for each # pack file. This causes us to group by pack file, then sort by # position. Size doesn't matter, but it isn't worth breaking up the # tuple. return positions[key][1] return keys.sort(key=get_index_memo) _get_total_build_size = _get_total_build_size class _KnitKeyAccess(object): """Access to records in .knit files.""" def __init__(self, transport, mapper): """Create a _KnitKeyAccess with transport and mapper. :param transport: The transport the access object is rooted at. :param mapper: The mapper used to map keys to .knit files. """ self._transport = transport self._mapper = mapper def add_raw_records(self, key_sizes, raw_data): """Add raw knit bytes to a storage area. The data is spooled to the container writer in one bytes-record per raw data item. :param sizes: An iterable of tuples containing the key and size of each raw data segment. :param raw_data: A bytestring containing the data. :return: A list of memos to retrieve the record later. Each memo is an opaque index memo. For _KnitKeyAccess the memo is (key, pos, length), where the key is the record key. """ if type(raw_data) is not str: raise AssertionError( 'data must be plain bytes was %s' % type(raw_data)) result = [] offset = 0 # TODO: This can be tuned for writing to sftp and other servers where # append() is relatively expensive by grouping the writes to each key # prefix. for key, size in key_sizes: path = self._mapper.map(key) try: base = self._transport.append_bytes(path + '.knit', raw_data[offset:offset+size]) except errors.NoSuchFile: self._transport.mkdir(osutils.dirname(path)) base = self._transport.append_bytes(path + '.knit', raw_data[offset:offset+size]) # if base == 0: # chmod. offset += size result.append((key, base, size)) return result def flush(self): """Flush pending writes on this access object. For .knit files this is a no-op. """ pass def get_raw_records(self, memos_for_retrieval): """Get the raw bytes for a records. :param memos_for_retrieval: An iterable containing the access memo for retrieving the bytes. :return: An iterator over the bytes of the records. """ # first pass, group into same-index request to minimise readv's issued. request_lists = [] current_prefix = None for (key, offset, length) in memos_for_retrieval: if current_prefix == key[:-1]: current_list.append((offset, length)) else: if current_prefix is not None: request_lists.append((current_prefix, current_list)) current_prefix = key[:-1] current_list = [(offset, length)] # handle the last entry if current_prefix is not None: request_lists.append((current_prefix, current_list)) for prefix, read_vector in request_lists: path = self._mapper.map(prefix) + '.knit' for pos, data in self._transport.readv(path, read_vector): yield data def annotate_knit(knit, revision_id): """Annotate a knit with no cached annotations. This implementation is for knits with no cached annotations. It will work for knits with cached annotations, but this is not recommended. """ annotator = _KnitAnnotator(knit) return iter(annotator.annotate_flat(revision_id)) class _KnitAnnotator(annotate.Annotator): """Build up the annotations for a text.""" def __init__(self, vf): annotate.Annotator.__init__(self, vf) # TODO: handle Nodes which cannot be extracted # self._ghosts = set() # Map from (key, parent_key) => matching_blocks, should be 'use once' self._matching_blocks = {} # KnitContent objects self._content_objects = {} # The number of children that depend on this fulltext content object self._num_compression_children = {} # Delta records that need their compression parent before they can be # expanded self._pending_deltas = {} # Fulltext records that are waiting for their parents fulltexts before # they can be yielded for annotation self._pending_annotation = {} self._all_build_details = {} def _get_build_graph(self, key): """Get the graphs for building texts and annotations. The data you need for creating a full text may be different than the data you need to annotate that text. (At a minimum, you need both parents to create an annotation, but only need 1 parent to generate the fulltext.) :return: A list of (key, index_memo) records, suitable for passing to read_records_iter to start reading in the raw data from the pack file. """ pending = set([key]) records = [] ann_keys = set() self._num_needed_children[key] = 1 while pending: # get all pending nodes this_iteration = pending build_details = self._vf._index.get_build_details(this_iteration) self._all_build_details.update(build_details) # new_nodes = self._vf._index._get_entries(this_iteration) pending = set() for key, details in build_details.iteritems(): (index_memo, compression_parent, parent_keys, record_details) = details self._parent_map[key] = parent_keys self._heads_provider = None records.append((key, index_memo)) # Do we actually need to check _annotated_lines? pending.update([p for p in parent_keys if p not in self._all_build_details]) if parent_keys: for parent_key in parent_keys: if parent_key in self._num_needed_children: self._num_needed_children[parent_key] += 1 else: self._num_needed_children[parent_key] = 1 if compression_parent: if compression_parent in self._num_compression_children: self._num_compression_children[compression_parent] += 1 else: self._num_compression_children[compression_parent] = 1 missing_versions = this_iteration.difference(build_details.keys()) if missing_versions: for key in missing_versions: if key in self._parent_map and key in self._text_cache: # We already have this text ready, we just need to # yield it later so we get it annotated ann_keys.add(key) parent_keys = self._parent_map[key] for parent_key in parent_keys: if parent_key in self._num_needed_children: self._num_needed_children[parent_key] += 1 else: self._num_needed_children[parent_key] = 1 pending.update([p for p in parent_keys if p not in self._all_build_details]) else: raise errors.RevisionNotPresent(key, self._vf) # Generally we will want to read the records in reverse order, because # we find the parent nodes after the children records.reverse() return records, ann_keys def _get_needed_texts(self, key, pb=None): # if True or len(self._vf._immediate_fallback_vfs) > 0: if len(self._vf._immediate_fallback_vfs) > 0: # If we have fallbacks, go to the generic path for v in annotate.Annotator._get_needed_texts(self, key, pb=pb): yield v return while True: try: records, ann_keys = self._get_build_graph(key) for idx, (sub_key, text, num_lines) in enumerate( self._extract_texts(records)): if pb is not None: pb.update(gettext('annotating'), idx, len(records)) yield sub_key, text, num_lines for sub_key in ann_keys: text = self._text_cache[sub_key] num_lines = len(text) # bad assumption yield sub_key, text, num_lines return except errors.RetryWithNewPacks, e: self._vf._access.reload_or_raise(e) # The cached build_details are no longer valid self._all_build_details.clear() def _cache_delta_blocks(self, key, compression_parent, delta, lines): parent_lines = self._text_cache[compression_parent] blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines)) self._matching_blocks[(key, compression_parent)] = blocks def _expand_record(self, key, parent_keys, compression_parent, record, record_details): delta = None if compression_parent: if compression_parent not in self._content_objects: # Waiting for the parent self._pending_deltas.setdefault(compression_parent, []).append( (key, parent_keys, record, record_details)) return None # We have the basis parent, so expand the delta num = self._num_compression_children[compression_parent] num -= 1 if num == 0: base_content = self._content_objects.pop(compression_parent) self._num_compression_children.pop(compression_parent) else: self._num_compression_children[compression_parent] = num base_content = self._content_objects[compression_parent] # It is tempting to want to copy_base_content=False for the last # child object. However, whenever noeol=False, # self._text_cache[parent_key] is content._lines. So mutating it # gives very bad results. # The alternative is to copy the lines into text cache, but then we # are copying anyway, so just do it here. content, delta = self._vf._factory.parse_record( key, record, record_details, base_content, copy_base_content=True) else: # Fulltext record content, _ = self._vf._factory.parse_record( key, record, record_details, None) if self._num_compression_children.get(key, 0) > 0: self._content_objects[key] = content lines = content.text() self._text_cache[key] = lines if delta is not None: self._cache_delta_blocks(key, compression_parent, delta, lines) return lines def _get_parent_annotations_and_matches(self, key, text, parent_key): """Get the list of annotations for the parent, and the matching lines. :param text: The opaque value given by _get_needed_texts :param parent_key: The key for the parent text :return: (parent_annotations, matching_blocks) parent_annotations is a list as long as the number of lines in parent matching_blocks is a list of (parent_idx, text_idx, len) tuples indicating which lines match between the two texts """ block_key = (key, parent_key) if block_key in self._matching_blocks: blocks = self._matching_blocks.pop(block_key) parent_annotations = self._annotations_cache[parent_key] return parent_annotations, blocks return annotate.Annotator._get_parent_annotations_and_matches(self, key, text, parent_key) def _process_pending(self, key): """The content for 'key' was just processed. Determine if there is any more pending work to be processed. """ to_return = [] if key in self._pending_deltas: compression_parent = key children = self._pending_deltas.pop(key) for child_key, parent_keys, record, record_details in children: lines = self._expand_record(child_key, parent_keys, compression_parent, record, record_details) if self._check_ready_for_annotations(child_key, parent_keys): to_return.append(child_key) # Also check any children that are waiting for this parent to be # annotation ready if key in self._pending_annotation: children = self._pending_annotation.pop(key) to_return.extend([c for c, p_keys in children if self._check_ready_for_annotations(c, p_keys)]) return to_return def _check_ready_for_annotations(self, key, parent_keys): """return true if this text is ready to be yielded. Otherwise, this will return False, and queue the text into self._pending_annotation """ for parent_key in parent_keys: if parent_key not in self._annotations_cache: # still waiting on at least one parent text, so queue it up # Note that if there are multiple parents, we need to wait # for all of them. self._pending_annotation.setdefault(parent_key, []).append((key, parent_keys)) return False return True def _extract_texts(self, records): """Extract the various texts needed based on records""" # We iterate in the order read, rather than a strict order requested # However, process what we can, and put off to the side things that # still need parents, cleaning them up when those parents are # processed. # Basic data flow: # 1) As 'records' are read, see if we can expand these records into # Content objects (and thus lines) # 2) If a given line-delta is waiting on its compression parent, it # gets queued up into self._pending_deltas, otherwise we expand # it, and put it into self._text_cache and self._content_objects # 3) If we expanded the text, we will then check to see if all # parents have also been processed. If so, this text gets yielded, # else this record gets set aside into pending_annotation # 4) Further, if we expanded the text in (2), we will then check to # see if there are any children in self._pending_deltas waiting to # also be processed. If so, we go back to (2) for those # 5) Further again, if we yielded the text, we can then check if that # 'unlocks' any of the texts in pending_annotations, which should # then get yielded as well # Note that both steps 4 and 5 are 'recursive' in that unlocking one # compression child could unlock yet another, and yielding a fulltext # will also 'unlock' the children that are waiting on that annotation. # (Though also, unlocking 1 parent's fulltext, does not unlock a child # if other parents are also waiting.) # We want to yield content before expanding child content objects, so # that we know when we can re-use the content lines, and the annotation # code can know when it can stop caching fulltexts, as well. # Children that are missing their compression parent pending_deltas = {} for (key, record, digest) in self._vf._read_records_iter(records): # ghosts? details = self._all_build_details[key] (_, compression_parent, parent_keys, record_details) = details lines = self._expand_record(key, parent_keys, compression_parent, record, record_details) if lines is None: # Pending delta should be queued up continue # At this point, we may be able to yield this content, if all # parents are also finished yield_this_text = self._check_ready_for_annotations(key, parent_keys) if yield_this_text: # All parents present yield key, lines, len(lines) to_process = self._process_pending(key) while to_process: this_process = to_process to_process = [] for key in this_process: lines = self._text_cache[key] yield key, lines, len(lines) to_process.extend(self._process_pending(key)) try: from bzrlib._knit_load_data_pyx import _load_data_c as _load_data except ImportError, e: osutils.failed_to_load_extension(e) from bzrlib._knit_load_data_py import _load_data_py as _load_data bzr-2.7.0/bzrlib/lazy_import.py0000644000000000000000000004012411701271114014646 0ustar 00000000000000# Copyright (C) 2006-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Functionality to create lazy evaluation objects. This includes waiting to import a module until it is actually used. Most commonly, the 'lazy_import' function is used to import other modules in an on-demand fashion. Typically use looks like:: from bzrlib.lazy_import import lazy_import lazy_import(globals(), ''' from bzrlib import ( errors, osutils, branch, ) import bzrlib.branch ''') Then 'errors, osutils, branch' and 'bzrlib' will exist as lazy-loaded objects which will be replaced with a real object on first use. In general, it is best to only load modules in this way. This is because it isn't safe to pass these variables to other functions before they have been replaced. This is especially true for constants, sometimes true for classes or functions (when used as a factory, or you want to inherit from them). """ from __future__ import absolute_import class ScopeReplacer(object): """A lazy object that will replace itself in the appropriate scope. This object sits, ready to create the real object the first time it is needed. """ __slots__ = ('_scope', '_factory', '_name', '_real_obj') # If you to do x = y, setting this to False will disallow access to # members from the second variable (i.e. x). This should normally # be enabled for reasons of thread safety and documentation, but # will be disabled during the selftest command to check for abuse. _should_proxy = True def __init__(self, scope, factory, name): """Create a temporary object in the specified scope. Once used, a real object will be placed in the scope. :param scope: The scope the object should appear in :param factory: A callable that will create the real object. It will be passed (self, scope, name) :param name: The variable name in the given scope. """ object.__setattr__(self, '_scope', scope) object.__setattr__(self, '_factory', factory) object.__setattr__(self, '_name', name) object.__setattr__(self, '_real_obj', None) scope[name] = self def _resolve(self): """Return the real object for which this is a placeholder""" name = object.__getattribute__(self, '_name') real_obj = object.__getattribute__(self, '_real_obj') if real_obj is None: # No obj generated previously, so generate from factory and scope. factory = object.__getattribute__(self, '_factory') scope = object.__getattribute__(self, '_scope') obj = factory(self, scope, name) if obj is self: raise errors.IllegalUseOfScopeReplacer(name, msg="Object tried" " to replace itself, check it's not using its own scope.") # Check if another thread has jumped in while obj was generated. real_obj = object.__getattribute__(self, '_real_obj') if real_obj is None: # Still no prexisting obj, so go ahead and assign to scope and # return. There is still a small window here where races will # not be detected, but safest to avoid additional locking. object.__setattr__(self, '_real_obj', obj) scope[name] = obj return obj # Raise if proxying is disabled as obj has already been generated. if not ScopeReplacer._should_proxy: raise errors.IllegalUseOfScopeReplacer( name, msg="Object already replaced, did you assign it" " to another variable?") return real_obj def __getattribute__(self, attr): obj = object.__getattribute__(self, '_resolve')() return getattr(obj, attr) def __setattr__(self, attr, value): obj = object.__getattribute__(self, '_resolve')() return setattr(obj, attr, value) def __call__(self, *args, **kwargs): obj = object.__getattribute__(self, '_resolve')() return obj(*args, **kwargs) def disallow_proxying(): """Disallow lazily imported modules to be used as proxies. Calling this function might cause problems with concurrent imports in multithreaded environments, but will help detecting wasteful indirection, so it should be called when executing unit tests. Only lazy imports that happen after this call are affected. """ ScopeReplacer._should_proxy = False class ImportReplacer(ScopeReplacer): """This is designed to replace only a portion of an import list. It will replace itself with a module, and then make children entries also ImportReplacer objects. At present, this only supports 'import foo.bar.baz' syntax. """ # '_import_replacer_children' is intentionally a long semi-unique name # that won't likely exist elsewhere. This allows us to detect an # ImportReplacer object by using # object.__getattribute__(obj, '_import_replacer_children') # We can't just use 'isinstance(obj, ImportReplacer)', because that # accesses .__class__, which goes through __getattribute__, and triggers # the replacement. __slots__ = ('_import_replacer_children', '_member', '_module_path') def __init__(self, scope, name, module_path, member=None, children={}): """Upon request import 'module_path' as the name 'module_name'. When imported, prepare children to also be imported. :param scope: The scope that objects should be imported into. Typically this is globals() :param name: The variable name. Often this is the same as the module_path. 'bzrlib' :param module_path: A list for the fully specified module path ['bzrlib', 'foo', 'bar'] :param member: The member inside the module to import, often this is None, indicating the module is being imported. :param children: Children entries to be imported later. This should be a map of children specifications. :: {'foo':(['bzrlib', 'foo'], None, {'bar':(['bzrlib', 'foo', 'bar'], None {})}) } Examples:: import foo => name='foo' module_path='foo', member=None, children={} import foo.bar => name='foo' module_path='foo', member=None, children={'bar':(['foo', 'bar'], None, {}} from foo import bar => name='bar' module_path='foo', member='bar' children={} from foo import bar, baz would get translated into 2 import requests. On for 'name=bar' and one for 'name=baz' """ if (member is not None) and children: raise ValueError('Cannot supply both a member and children') object.__setattr__(self, '_import_replacer_children', children) object.__setattr__(self, '_member', member) object.__setattr__(self, '_module_path', module_path) # Indirecting through __class__ so that children can # override _import (especially our instrumented version) cls = object.__getattribute__(self, '__class__') ScopeReplacer.__init__(self, scope=scope, name=name, factory=cls._import) def _import(self, scope, name): children = object.__getattribute__(self, '_import_replacer_children') member = object.__getattribute__(self, '_member') module_path = object.__getattribute__(self, '_module_path') module_python_path = '.'.join(module_path) if member is not None: module = __import__(module_python_path, scope, scope, [member], level=0) return getattr(module, member) else: module = __import__(module_python_path, scope, scope, [], level=0) for path in module_path[1:]: module = getattr(module, path) # Prepare the children to be imported for child_name, (child_path, child_member, grandchildren) in \ children.iteritems(): # Using self.__class__, so that children get children classes # instantiated. (This helps with instrumented tests) cls = object.__getattribute__(self, '__class__') cls(module.__dict__, name=child_name, module_path=child_path, member=child_member, children=grandchildren) return module class ImportProcessor(object): """Convert text that users input into lazy import requests""" # TODO: jam 20060912 This class is probably not strict enough about # what type of text it allows. For example, you can do: # import (foo, bar), which is not allowed by python. # For now, it should be supporting a superset of python import # syntax which is all we really care about. __slots__ = ['imports', '_lazy_import_class'] def __init__(self, lazy_import_class=None): self.imports = {} if lazy_import_class is None: self._lazy_import_class = ImportReplacer else: self._lazy_import_class = lazy_import_class def lazy_import(self, scope, text): """Convert the given text into a bunch of lazy import objects. This takes a text string, which should be similar to normal python import markup. """ self._build_map(text) self._convert_imports(scope) def _convert_imports(self, scope): # Now convert the map into a set of imports for name, info in self.imports.iteritems(): self._lazy_import_class(scope, name=name, module_path=info[0], member=info[1], children=info[2]) def _build_map(self, text): """Take a string describing imports, and build up the internal map""" for line in self._canonicalize_import_text(text): if line.startswith('import '): self._convert_import_str(line) elif line.startswith('from '): self._convert_from_str(line) else: raise errors.InvalidImportLine(line, "doesn't start with 'import ' or 'from '") def _convert_import_str(self, import_str): """This converts a import string into an import map. This only understands 'import foo, foo.bar, foo.bar.baz as bing' :param import_str: The import string to process """ if not import_str.startswith('import '): raise ValueError('bad import string %r' % (import_str,)) import_str = import_str[len('import '):] for path in import_str.split(','): path = path.strip() if not path: continue as_hunks = path.split(' as ') if len(as_hunks) == 2: # We have 'as' so this is a different style of import # 'import foo.bar.baz as bing' creates a local variable # named 'bing' which points to 'foo.bar.baz' name = as_hunks[1].strip() module_path = as_hunks[0].strip().split('.') if name in self.imports: raise errors.ImportNameCollision(name) # No children available in 'import foo as bar' self.imports[name] = (module_path, None, {}) else: # Now we need to handle module_path = path.split('.') name = module_path[0] if name not in self.imports: # This is a new import that we haven't seen before module_def = ([name], None, {}) self.imports[name] = module_def else: module_def = self.imports[name] cur_path = [name] cur = module_def[2] for child in module_path[1:]: cur_path.append(child) if child in cur: cur = cur[child][2] else: next = (cur_path[:], None, {}) cur[child] = next cur = next[2] def _convert_from_str(self, from_str): """This converts a 'from foo import bar' string into an import map. :param from_str: The import string to process """ if not from_str.startswith('from '): raise ValueError('bad from/import %r' % from_str) from_str = from_str[len('from '):] from_module, import_list = from_str.split(' import ') from_module_path = from_module.split('.') for path in import_list.split(','): path = path.strip() if not path: continue as_hunks = path.split(' as ') if len(as_hunks) == 2: # We have 'as' so this is a different style of import # 'import foo.bar.baz as bing' creates a local variable # named 'bing' which points to 'foo.bar.baz' name = as_hunks[1].strip() module = as_hunks[0].strip() else: name = module = path if name in self.imports: raise errors.ImportNameCollision(name) self.imports[name] = (from_module_path, module, {}) def _canonicalize_import_text(self, text): """Take a list of imports, and split it into regularized form. This is meant to take regular import text, and convert it to the forms that the rest of the converters prefer. """ out = [] cur = None continuing = False for line in text.split('\n'): line = line.strip() loc = line.find('#') if loc != -1: line = line[:loc].strip() if not line: continue if cur is not None: if line.endswith(')'): out.append(cur + ' ' + line[:-1]) cur = None else: cur += ' ' + line else: if '(' in line and ')' not in line: cur = line.replace('(', '') else: out.append(line.replace('(', '').replace(')', '')) if cur is not None: raise errors.InvalidImportLine(cur, 'Unmatched parenthesis') return out def lazy_import(scope, text, lazy_import_class=None): """Create lazy imports for all of the imports in text. This is typically used as something like:: from bzrlib.lazy_import import lazy_import lazy_import(globals(), ''' from bzrlib import ( foo, bar, baz, ) import bzrlib.branch import bzrlib.transport ''') Then 'foo, bar, baz' and 'bzrlib' will exist as lazy-loaded objects which will be replaced with a real object on first use. In general, it is best to only load modules in this way. This is because other objects (functions/classes/variables) are frequently used without accessing a member, which means we cannot tell they have been used. """ # This is just a helper around ImportProcessor.lazy_import proc = ImportProcessor(lazy_import_class=lazy_import_class) return proc.lazy_import(scope, text) # The only module that this module depends on is 'bzrlib.errors'. But it # can actually be imported lazily, since we only need it if there is a # problem. lazy_import(globals(), """ from bzrlib import errors """) bzr-2.7.0/bzrlib/lazy_regex.py0000644000000000000000000001116211673635356014472 0ustar 00000000000000# Copyright (C) 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Lazily compiled regex objects. This module defines a class which creates proxy objects for regex compilation. This allows overriding re.compile() to return lazily compiled objects. We do this rather than just providing a new interface so that it will also be used by existing Python modules that create regexs. """ from __future__ import absolute_import import re from bzrlib import errors class LazyRegex(object): """A proxy around a real regex, which won't be compiled until accessed.""" # These are the parameters on a real _sre.SRE_Pattern object, which we # will map to local members so that we don't have the proxy overhead. _regex_attributes_to_copy = [ '__copy__', '__deepcopy__', 'findall', 'finditer', 'match', 'scanner', 'search', 'split', 'sub', 'subn' ] # We use slots to keep the overhead low. But we need a slot entry for # all of the attributes we will copy __slots__ = ['_real_regex', '_regex_args', '_regex_kwargs', ] + _regex_attributes_to_copy def __init__(self, args=(), kwargs={}): """Create a new proxy object, passing in the args to pass to re.compile :param args: The `*args` to pass to re.compile :param kwargs: The `**kwargs` to pass to re.compile """ self._real_regex = None self._regex_args = args self._regex_kwargs = kwargs def _compile_and_collapse(self): """Actually compile the requested regex""" self._real_regex = self._real_re_compile(*self._regex_args, **self._regex_kwargs) for attr in self._regex_attributes_to_copy: setattr(self, attr, getattr(self._real_regex, attr)) def _real_re_compile(self, *args, **kwargs): """Thunk over to the original re.compile""" try: return _real_re_compile(*args, **kwargs) except re.error, e: # raise InvalidPattern instead of re.error as this gives a # cleaner message to the user. raise errors.InvalidPattern('"' + args[0] + '" ' +str(e)) def __getstate__(self): """Return the state to use when pickling.""" return { "args": self._regex_args, "kwargs": self._regex_kwargs, } def __setstate__(self, dict): """Restore from a pickled state.""" self._real_regex = None setattr(self, "_regex_args", dict["args"]) setattr(self, "_regex_kwargs", dict["kwargs"]) def __getattr__(self, attr): """Return a member from the proxied regex object. If the regex hasn't been compiled yet, compile it """ if self._real_regex is None: self._compile_and_collapse() # Once we have compiled, the only time we should come here # is actually if the attribute is missing. return getattr(self._real_regex, attr) def lazy_compile(*args, **kwargs): """Create a proxy object which will compile the regex on demand. :return: a LazyRegex proxy object. """ return LazyRegex(args, kwargs) def install_lazy_compile(): """Make lazy_compile the default compile mode for regex compilation. This overrides re.compile with lazy_compile. To restore the original functionality, call reset_compile(). """ re.compile = lazy_compile def reset_compile(): """Restore the original function to re.compile(). It is safe to call reset_compile() multiple times, it will always restore re.compile() to the value that existed at import time. Though the first call will reset back to the original (it doesn't track nesting level) """ re.compile = _real_re_compile _real_re_compile = re.compile if _real_re_compile is lazy_compile: raise AssertionError( "re.compile has already been overridden as lazy_compile, but this would" \ " cause infinite recursion") bzr-2.7.0/bzrlib/library_state.py0000644000000000000000000001055412005721672015154 0ustar 00000000000000# Copyright (C) 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """The core state needed to make use of bzr is managed here.""" from __future__ import absolute_import __all__ = [ 'BzrLibraryState', ] import bzrlib from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( cleanup, config, osutils, symbol_versioning, trace, ui, ) """) class BzrLibraryState(object): """The state about how bzrlib has been configured. This is the core state needed to make use of bzr. The current instance is currently always exposed as bzrlib.global_state, but we desired to move to a point where no global state is needed at all. :ivar saved_state: The bzrlib.global_state at the time __enter__ was called. :ivar cleanups: An ObjectWithCleanups which can be used for cleanups that should occur when the use of bzrlib is completed. This is initialised in __enter__ and executed in __exit__. """ def __init__(self, ui, trace): """Create library start for normal use of bzrlib. Most applications that embed bzrlib, including bzr itself, should just call bzrlib.initialize(), but it is possible to use the state class directly. The initialize() function provides sensible defaults for a CLI program, such as a text UI factory. More options may be added in future so callers should use named arguments. BzrLibraryState implements the Python 2.5 Context Manager protocol PEP343, and can be used with the with statement. Upon __enter__ the global variables in use by bzr are set, and they are cleared on __exit__. :param ui: A bzrlib.ui.ui_factory to use. :param trace: A bzrlib.trace.Config context manager to use, perhaps bzrlib.trace.DefaultConfig. """ self._ui = ui self._trace = trace # There is no overrides by default, they are set later when the command # arguments are parsed. self.cmdline_overrides = config.CommandLineStore() # No config stores are cached to start with self.config_stores = {} # By url self.started = False def __enter__(self): if not self.started: self._start() return self # This is bound to the 'as' clause in a with statement. def _start(self): """Do all initialization.""" # NB: This function tweaks so much global state it's hard to test it in # isolation within the same interpreter. It's not reached on normal # in-process run_bzr calls. If it's broken, we expect that # TestRunBzrSubprocess may fail. self.cleanups = cleanup.ObjectWithCleanups() if bzrlib.version_info[3] == 'final': self.cleanups.add_cleanup( symbol_versioning.suppress_deprecation_warnings(override=True)) self._trace.__enter__() self._orig_ui = bzrlib.ui.ui_factory bzrlib.ui.ui_factory = self._ui self._ui.__enter__() self.saved_state = bzrlib.global_state bzrlib.global_state = self self.started = True def __exit__(self, exc_type, exc_val, exc_tb): if exc_type is None: # Save config changes for k, store in self.config_stores.iteritems(): store.save_changes() self.cleanups.cleanup_now() trace._flush_stdout_stderr() trace._flush_trace() osutils.report_extension_load_failures() self._ui.__exit__(None, None, None) self._trace.__exit__(None, None, None) ui.ui_factory = self._orig_ui bzrlib.global_state = self.saved_state return False # propogate exceptions. bzr-2.7.0/bzrlib/lock.py0000644000000000000000000004630312000073721013227 0ustar 00000000000000# Copyright (C) 2005-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Locking using OS file locks or file existence. Note: This method of locking is generally deprecated in favour of LockDir, but is used to lock local WorkingTrees, and by some old formats. It's accessed through Transport.lock_read(), etc. This module causes two methods, lock() and unlock() to be defined in any way that works on the current platform. It is not specified whether these locks are reentrant (i.e. can be taken repeatedly by a single process) or whether they exclude different threads in a single process. That reentrancy is provided by LockableFiles. This defines two classes: ReadLock and WriteLock, which can be implemented in different ways on different platforms. Both have an unlock() method. """ from __future__ import absolute_import import contextlib import errno import os import sys import warnings from bzrlib import ( debug, errors, osutils, trace, ) from bzrlib.hooks import Hooks from bzrlib.i18n import gettext class LockHooks(Hooks): def __init__(self): Hooks.__init__(self, "bzrlib.lock", "Lock.hooks") self.add_hook('lock_acquired', "Called with a bzrlib.lock.LockResult when a physical lock is " "acquired.", (1, 8)) self.add_hook('lock_released', "Called with a bzrlib.lock.LockResult when a physical lock is " "released.", (1, 8)) self.add_hook('lock_broken', "Called with a bzrlib.lock.LockResult when a physical lock is " "broken.", (1, 15)) class Lock(object): """Base class for locks. :cvar hooks: Hook dictionary for operations on locks. """ hooks = LockHooks() class LockResult(object): """Result of an operation on a lock; passed to a hook""" def __init__(self, lock_url, details=None): """Create a lock result for lock with optional details about the lock.""" self.lock_url = lock_url self.details = details def __eq__(self, other): return self.lock_url == other.lock_url and self.details == other.details def __repr__(self): return '%s(%s, %s)' % (self.__class__.__name__, self.lock_url, self.details) class LogicalLockResult(object): """The result of a lock_read/lock_write/lock_tree_write call on lockables. :ivar unlock: A callable which will unlock the lock. """ def __init__(self, unlock): self.unlock = unlock def __repr__(self): return "LogicalLockResult(%s)" % (self.unlock) def cant_unlock_not_held(locked_object): """An attempt to unlock failed because the object was not locked. This provides a policy point from which we can generate either a warning or an exception. """ # This is typically masking some other error and called from a finally # block, so it's useful to have the option not to generate a new error # here. You can use -Werror to make it fatal. It should possibly also # raise LockNotHeld. if 'unlock' in debug.debug_flags: warnings.warn("%r is already unlocked" % (locked_object,), stacklevel=3) else: raise errors.LockNotHeld(locked_object) try: import fcntl have_fcntl = True except ImportError: have_fcntl = False have_pywin32 = False have_ctypes_win32 = False if sys.platform == 'win32': import msvcrt try: import win32file, pywintypes, winerror have_pywin32 = True except ImportError: pass try: import ctypes have_ctypes_win32 = True except ImportError: pass class _OSLock(object): def __init__(self): self.f = None self.filename = None def _open(self, filename, filemode): self.filename = osutils.realpath(filename) try: self.f = open(self.filename, filemode) return self.f except IOError, e: if e.errno in (errno.EACCES, errno.EPERM): raise errors.LockFailed(self.filename, str(e)) if e.errno != errno.ENOENT: raise # maybe this is an old branch (before may 2005) trace.mutter("trying to create missing lock %r", self.filename) self.f = open(self.filename, 'wb+') return self.f def _clear_f(self): """Clear the self.f attribute cleanly.""" if self.f: self.f.close() self.f = None def unlock(self): raise NotImplementedError() _lock_classes = [] if have_fcntl: class _fcntl_FileLock(_OSLock): def _unlock(self): fcntl.lockf(self.f, fcntl.LOCK_UN) self._clear_f() class _fcntl_WriteLock(_fcntl_FileLock): _open_locks = set() def __init__(self, filename): super(_fcntl_WriteLock, self).__init__() # Check we can grab a lock before we actually open the file. self.filename = osutils.realpath(filename) if self.filename in _fcntl_WriteLock._open_locks: self._clear_f() raise errors.LockContention(self.filename) if self.filename in _fcntl_ReadLock._open_locks: if 'strict_locks' in debug.debug_flags: self._clear_f() raise errors.LockContention(self.filename) else: trace.mutter('Write lock taken w/ an open read lock on: %s' % (self.filename,)) self._open(self.filename, 'rb+') # reserve a slot for this lock - even if the lockf call fails, # at this point unlock() will be called, because self.f is set. # TODO: make this fully threadsafe, if we decide we care. _fcntl_WriteLock._open_locks.add(self.filename) try: # LOCK_NB will cause IOError to be raised if we can't grab a # lock right away. fcntl.lockf(self.f, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError, e: if e.errno in (errno.EAGAIN, errno.EACCES): # We couldn't grab the lock self.unlock() # we should be more precise about whats a locking # error and whats a random-other error raise errors.LockContention(self.filename, e) def unlock(self): _fcntl_WriteLock._open_locks.remove(self.filename) self._unlock() class _fcntl_ReadLock(_fcntl_FileLock): _open_locks = {} def __init__(self, filename): super(_fcntl_ReadLock, self).__init__() self.filename = osutils.realpath(filename) if self.filename in _fcntl_WriteLock._open_locks: if 'strict_locks' in debug.debug_flags: # We raise before calling _open so we don't need to # _clear_f raise errors.LockContention(self.filename) else: trace.mutter('Read lock taken w/ an open write lock on: %s' % (self.filename,)) _fcntl_ReadLock._open_locks.setdefault(self.filename, 0) _fcntl_ReadLock._open_locks[self.filename] += 1 self._open(filename, 'rb') try: # LOCK_NB will cause IOError to be raised if we can't grab a # lock right away. fcntl.lockf(self.f, fcntl.LOCK_SH | fcntl.LOCK_NB) except IOError, e: # we should be more precise about whats a locking # error and whats a random-other error raise errors.LockContention(self.filename, e) def unlock(self): count = _fcntl_ReadLock._open_locks[self.filename] if count == 1: del _fcntl_ReadLock._open_locks[self.filename] else: _fcntl_ReadLock._open_locks[self.filename] = count - 1 self._unlock() def temporary_write_lock(self): """Try to grab a write lock on the file. On platforms that support it, this will upgrade to a write lock without unlocking the file. Otherwise, this will release the read lock, and try to acquire a write lock. :return: A token which can be used to switch back to a read lock. """ if self.filename in _fcntl_WriteLock._open_locks: raise AssertionError('file already locked: %r' % (self.filename,)) try: wlock = _fcntl_TemporaryWriteLock(self) except errors.LockError: # We didn't unlock, so we can just return 'self' return False, self return True, wlock class _fcntl_TemporaryWriteLock(_OSLock): """A token used when grabbing a temporary_write_lock. Call restore_read_lock() when you are done with the write lock. """ def __init__(self, read_lock): super(_fcntl_TemporaryWriteLock, self).__init__() self._read_lock = read_lock self.filename = read_lock.filename count = _fcntl_ReadLock._open_locks[self.filename] if count > 1: # Something else also has a read-lock, so we cannot grab a # write lock. raise errors.LockContention(self.filename) if self.filename in _fcntl_WriteLock._open_locks: raise AssertionError('file already locked: %r' % (self.filename,)) # See if we can open the file for writing. Another process might # have a read lock. We don't use self._open() because we don't want # to create the file if it exists. That would have already been # done by _fcntl_ReadLock try: new_f = open(self.filename, 'rb+') except IOError, e: if e.errno in (errno.EACCES, errno.EPERM): raise errors.LockFailed(self.filename, str(e)) raise try: # LOCK_NB will cause IOError to be raised if we can't grab a # lock right away. fcntl.lockf(new_f, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError, e: # TODO: Raise a more specific error based on the type of error raise errors.LockContention(self.filename, e) _fcntl_WriteLock._open_locks.add(self.filename) self.f = new_f def restore_read_lock(self): """Restore the original ReadLock.""" # For fcntl, since we never released the read lock, just release the # write lock, and return the original lock. fcntl.lockf(self.f, fcntl.LOCK_UN) self._clear_f() _fcntl_WriteLock._open_locks.remove(self.filename) # Avoid reference cycles read_lock = self._read_lock self._read_lock = None return read_lock _lock_classes.append(('fcntl', _fcntl_WriteLock, _fcntl_ReadLock)) if have_pywin32 and sys.platform == 'win32': if os.path.supports_unicode_filenames: # for Windows NT/2K/XP/etc win32file_CreateFile = win32file.CreateFileW else: # for Windows 98 win32file_CreateFile = win32file.CreateFile class _w32c_FileLock(_OSLock): def _open(self, filename, access, share, cflags, pymode): self.filename = osutils.realpath(filename) try: self._handle = win32file_CreateFile(filename, access, share, None, win32file.OPEN_ALWAYS, win32file.FILE_ATTRIBUTE_NORMAL, None) except pywintypes.error, e: if e.args[0] == winerror.ERROR_ACCESS_DENIED: raise errors.LockFailed(filename, e) if e.args[0] == winerror.ERROR_SHARING_VIOLATION: raise errors.LockContention(filename, e) raise fd = win32file._open_osfhandle(self._handle, cflags) self.f = os.fdopen(fd, pymode) return self.f def unlock(self): self._clear_f() self._handle = None class _w32c_ReadLock(_w32c_FileLock): def __init__(self, filename): super(_w32c_ReadLock, self).__init__() self._open(filename, win32file.GENERIC_READ, win32file.FILE_SHARE_READ, os.O_RDONLY, "rb") def temporary_write_lock(self): """Try to grab a write lock on the file. On platforms that support it, this will upgrade to a write lock without unlocking the file. Otherwise, this will release the read lock, and try to acquire a write lock. :return: A token which can be used to switch back to a read lock. """ # I can't find a way to upgrade a read lock to a write lock without # unlocking first. So here, we do just that. self.unlock() try: wlock = _w32c_WriteLock(self.filename) except errors.LockError: return False, _w32c_ReadLock(self.filename) return True, wlock class _w32c_WriteLock(_w32c_FileLock): def __init__(self, filename): super(_w32c_WriteLock, self).__init__() self._open(filename, win32file.GENERIC_READ | win32file.GENERIC_WRITE, 0, os.O_RDWR, "rb+") def restore_read_lock(self): """Restore the original ReadLock.""" # For win32 we had to completely let go of the original lock, so we # just unlock and create a new read lock. self.unlock() return _w32c_ReadLock(self.filename) _lock_classes.append(('pywin32', _w32c_WriteLock, _w32c_ReadLock)) if have_ctypes_win32: from ctypes.wintypes import DWORD, LPCSTR, LPCWSTR LPSECURITY_ATTRIBUTES = ctypes.c_void_p # used as NULL no need to declare HANDLE = ctypes.c_int # rather than unsigned as in ctypes.wintypes if os.path.supports_unicode_filenames: _function_name = "CreateFileW" LPTSTR = LPCWSTR else: _function_name = "CreateFileA" class LPTSTR(LPCSTR): def __new__(cls, obj): return LPCSTR.__new__(cls, obj.encode("mbcs")) # CreateFile _CreateFile = ctypes.WINFUNCTYPE( HANDLE, # return value LPTSTR, # lpFileName DWORD, # dwDesiredAccess DWORD, # dwShareMode LPSECURITY_ATTRIBUTES, # lpSecurityAttributes DWORD, # dwCreationDisposition DWORD, # dwFlagsAndAttributes HANDLE # hTemplateFile )((_function_name, ctypes.windll.kernel32)) INVALID_HANDLE_VALUE = -1 GENERIC_READ = 0x80000000 GENERIC_WRITE = 0x40000000 FILE_SHARE_READ = 1 OPEN_ALWAYS = 4 FILE_ATTRIBUTE_NORMAL = 128 ERROR_ACCESS_DENIED = 5 ERROR_SHARING_VIOLATION = 32 class _ctypes_FileLock(_OSLock): def _open(self, filename, access, share, cflags, pymode): self.filename = osutils.realpath(filename) handle = _CreateFile(filename, access, share, None, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0) if handle in (INVALID_HANDLE_VALUE, 0): e = ctypes.WinError() if e.args[0] == ERROR_ACCESS_DENIED: raise errors.LockFailed(filename, e) if e.args[0] == ERROR_SHARING_VIOLATION: raise errors.LockContention(filename, e) raise e fd = msvcrt.open_osfhandle(handle, cflags) self.f = os.fdopen(fd, pymode) return self.f def unlock(self): self._clear_f() class _ctypes_ReadLock(_ctypes_FileLock): def __init__(self, filename): super(_ctypes_ReadLock, self).__init__() self._open(filename, GENERIC_READ, FILE_SHARE_READ, os.O_RDONLY, "rb") def temporary_write_lock(self): """Try to grab a write lock on the file. On platforms that support it, this will upgrade to a write lock without unlocking the file. Otherwise, this will release the read lock, and try to acquire a write lock. :return: A token which can be used to switch back to a read lock. """ # I can't find a way to upgrade a read lock to a write lock without # unlocking first. So here, we do just that. self.unlock() try: wlock = _ctypes_WriteLock(self.filename) except errors.LockError: return False, _ctypes_ReadLock(self.filename) return True, wlock class _ctypes_WriteLock(_ctypes_FileLock): def __init__(self, filename): super(_ctypes_WriteLock, self).__init__() self._open(filename, GENERIC_READ | GENERIC_WRITE, 0, os.O_RDWR, "rb+") def restore_read_lock(self): """Restore the original ReadLock.""" # For win32 we had to completely let go of the original lock, so we # just unlock and create a new read lock. self.unlock() return _ctypes_ReadLock(self.filename) _lock_classes.append(('ctypes', _ctypes_WriteLock, _ctypes_ReadLock)) if len(_lock_classes) == 0: raise NotImplementedError( "We must have one of fcntl, pywin32, or ctypes available" " to support OS locking." ) # We default to using the first available lock class. _lock_type, WriteLock, ReadLock = _lock_classes[0] class _RelockDebugMixin(object): """Mixin support for -Drelock flag. Add this as a base class then call self._note_lock with 'r' or 'w' when acquiring a read- or write-lock. If this object was previously locked (and locked the same way), and -Drelock is set, then this will trace.note a message about it. """ _prev_lock = None def _note_lock(self, lock_type): if 'relock' in debug.debug_flags and self._prev_lock == lock_type: if lock_type == 'r': type_name = 'read' else: type_name = 'write' trace.note(gettext('{0!r} was {1} locked again'), self, type_name) self._prev_lock = lock_type @contextlib.contextmanager def write_locked(lockable): lockable.lock_write() try: yield lockable finally: lockable.unlock() bzr-2.7.0/bzrlib/lockable_files.py0000644000000000000000000002512011673360271015245 0ustar 00000000000000# Copyright (C) 2005-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ import warnings from bzrlib import ( counted_lock, errors, lock, osutils, transactions, urlutils, ) """) from bzrlib.decorators import ( only_raises, ) class LockableFiles(object): """Object representing a set of related files locked within the same scope. This coordinates access to the lock along with providing a transaction. LockableFiles manage a lock count and can be locked repeatedly by a single caller. (The underlying lock implementation generally does not support this.) Instances of this class are often called control_files. This class is now deprecated; code should move to using the Transport directly for file operations and using the lock or CountedLock for locking. :ivar _lock: The real underlying lock (e.g. a LockDir) :ivar _lock_count: If _lock_mode is true, a positive count of the number of times the lock has been taken (and not yet released) *by this process*, through this particular object instance. :ivar _lock_mode: None, or 'r' or 'w' """ def __init__(self, transport, lock_name, lock_class): """Create a LockableFiles group :param transport: Transport pointing to the directory holding the control files and lock. :param lock_name: Name of the lock guarding these files. :param lock_class: Class of lock strategy to use: typically either LockDir or TransportLock. """ self._transport = transport self.lock_name = lock_name self._transaction = None self._lock_mode = None self._lock_count = 0 self._find_modes() esc_name = self._escape(lock_name) self._lock = lock_class(transport, esc_name, file_modebits=self._file_mode, dir_modebits=self._dir_mode) self._counted_lock = counted_lock.CountedLock(self._lock) def create_lock(self): """Create the lock. This should normally be called only when the LockableFiles directory is first created on disk. """ self._lock.create(mode=self._dir_mode) def __repr__(self): return '%s(%r)' % (self.__class__.__name__, self._transport) def __str__(self): return 'LockableFiles(%s, %s)' % (self.lock_name, self._transport.base) def break_lock(self): """Break the lock of this lockable files group if it is held. The current ui factory will be used to prompt for user conformation. """ self._lock.break_lock() def _escape(self, file_or_path): """DEPRECATED: Do not use outside this class""" if not isinstance(file_or_path, basestring): file_or_path = '/'.join(file_or_path) if file_or_path == '': return u'' return urlutils.escape(osutils.safe_unicode(file_or_path)) def _find_modes(self): """Determine the appropriate modes for files and directories. :deprecated: Replaced by BzrDir._find_creation_modes. """ # XXX: The properties created by this can be removed or deprecated # once all the _get_text_store methods etc no longer use them. # -- mbp 20080512 try: st = self._transport.stat('.') except errors.TransportNotPossible: self._dir_mode = 0755 self._file_mode = 0644 else: # Check the directory mode, but also make sure the created # directories and files are read-write for this user. This is # mostly a workaround for filesystems which lie about being able to # write to a directory (cygwin & win32) self._dir_mode = (st.st_mode & 07777) | 00700 # Remove the sticky and execute bits for files self._file_mode = self._dir_mode & ~07111 def leave_in_place(self): """Set this LockableFiles to not clear the physical lock on unlock.""" self._lock.leave_in_place() def dont_leave_in_place(self): """Set this LockableFiles to clear the physical lock on unlock.""" self._lock.dont_leave_in_place() def lock_write(self, token=None): """Lock this group of files for writing. :param token: if this is already locked, then lock_write will fail unless the token matches the existing lock. :returns: a token if this instance supports tokens, otherwise None. :raises TokenLockingNotSupported: when a token is given but this instance doesn't support using token locks. :raises MismatchedToken: if the specified token doesn't match the token of the existing lock. A token should be passed in if you know that you have locked the object some other way, and need to synchronise this object's state with that fact. """ if self._lock_mode: if (self._lock_mode != 'w' or not self.get_transaction().writeable()): raise errors.ReadOnlyError(self) self._lock.validate_token(token) self._lock_count += 1 return self._token_from_lock else: token_from_lock = self._lock.lock_write(token=token) #traceback.print_stack() self._lock_mode = 'w' self._lock_count = 1 self._set_write_transaction() self._token_from_lock = token_from_lock return token_from_lock def lock_read(self): if self._lock_mode: if self._lock_mode not in ('r', 'w'): raise ValueError("invalid lock mode %r" % (self._lock_mode,)) self._lock_count += 1 else: self._lock.lock_read() #traceback.print_stack() self._lock_mode = 'r' self._lock_count = 1 self._set_read_transaction() def _set_read_transaction(self): """Setup a read transaction.""" self._set_transaction(transactions.ReadOnlyTransaction()) # 5K may be excessive, but hey, its a knob. self.get_transaction().set_cache_size(5000) def _set_write_transaction(self): """Setup a write transaction.""" self._set_transaction(transactions.WriteTransaction()) @only_raises(errors.LockNotHeld, errors.LockBroken) def unlock(self): if not self._lock_mode: return lock.cant_unlock_not_held(self) if self._lock_count > 1: self._lock_count -= 1 else: #traceback.print_stack() self._finish_transaction() try: self._lock.unlock() finally: self._lock_mode = self._lock_count = None def is_locked(self): """Return true if this LockableFiles group is locked""" return self._lock_count >= 1 def get_physical_lock_status(self): """Return physical lock status. Returns true if a lock is held on the transport. If no lock is held, or the underlying locking mechanism does not support querying lock status, false is returned. """ try: return self._lock.peek() is not None except NotImplementedError: return False def get_transaction(self): """Return the current active transaction. If no transaction is active, this returns a passthrough object for which all data is immediately flushed and no caching happens. """ if self._transaction is None: return transactions.PassThroughTransaction() else: return self._transaction def _set_transaction(self, new_transaction): """Set a new active transaction.""" if self._transaction is not None: raise errors.LockError('Branch %s is in a transaction already.' % self) self._transaction = new_transaction def _finish_transaction(self): """Exit the current transaction.""" if self._transaction is None: raise errors.LockError('Branch %s is not in a transaction' % self) transaction = self._transaction self._transaction = None transaction.finish() class TransportLock(object): """Locking method which uses transport-dependent locks. On the local filesystem these transform into OS-managed locks. These do not guard against concurrent access via different transports. This is suitable for use only in WorkingTrees (which are at present always local). """ def __init__(self, transport, escaped_name, file_modebits, dir_modebits): self._transport = transport self._escaped_name = escaped_name self._file_modebits = file_modebits self._dir_modebits = dir_modebits def break_lock(self): raise NotImplementedError(self.break_lock) def leave_in_place(self): raise NotImplementedError(self.leave_in_place) def dont_leave_in_place(self): raise NotImplementedError(self.dont_leave_in_place) def lock_write(self, token=None): if token is not None: raise errors.TokenLockingNotSupported(self) self._lock = self._transport.lock_write(self._escaped_name) def lock_read(self): self._lock = self._transport.lock_read(self._escaped_name) def unlock(self): self._lock.unlock() self._lock = None def peek(self): raise NotImplementedError() def create(self, mode=None): """Create lock mechanism""" # for old-style locks, create the file now self._transport.put_bytes(self._escaped_name, '', mode=self._file_modebits) def validate_token(self, token): if token is not None: raise errors.TokenLockingNotSupported(self) bzr-2.7.0/bzrlib/lockdir.py0000644000000000000000000010330112006462334013726 0ustar 00000000000000# Copyright (C) 2006-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """On-disk mutex protecting a resource bzr on-disk objects are locked by the existence of a directory with a particular name within the control directory. We use this rather than OS internal locks (such as flock etc) because they can be seen across all transports, including http. Objects can be read if there is only physical read access; therefore readers can never be required to create a lock, though they will check whether a writer is using the lock. Writers can't detect whether anyone else is reading from the resource as they write. This works because of ordering constraints that make sure readers see a consistent view of existing data. Waiting for a lock must be done by polling; this can be aborted after a timeout. Locks must always be explicitly released, typically from a try/finally block -- they are not released from a finalizer or when Python exits. Locks may fail to be released if the process is abruptly terminated (machine stop, SIGKILL) or if a remote transport becomes permanently disconnected. There is therefore a method to break an existing lock. This should rarely be used, and generally only with user approval. Locks contain some information on when the lock was taken and by who which may guide in deciding whether it can safely be broken. (This is similar to the messages displayed by emacs and vim.) Note that if the lock holder is still alive they will get no notification that the lock has been broken and will continue their work -- so it is important to be sure they are actually dead. A lock is represented on disk by a directory of a particular name, containing an information file. Taking a lock is done by renaming a temporary directory into place. We use temporary directories because for all known transports and filesystems we believe that exactly one attempt to claim the lock will succeed and the others will fail. (Files won't do because some filesystems or transports only have rename-and-overwrite, making it hard to tell who won.) The desired characteristics are: * Locks are not reentrant. (That is, a client that tries to take a lock it already holds may deadlock or fail.) * Stale locks can be guessed at by a heuristic * Lost locks can be broken by any client * Failed lock operations leave little or no mess * Deadlocks are avoided by having a timeout always in use, clients desiring indefinite waits can retry or set a silly big timeout. Storage formats use the locks, and also need to consider concurrency issues underneath the lock. A format may choose not to use a lock at all for some operations. LockDirs always operate over a Transport. The transport may be readonly, in which case the lock can be queried but not acquired. Locks are identified by a path name, relative to a base transport. Calling code will typically want to make sure there is exactly one LockDir object per actual lock on disk. This module does nothing to prevent aliasing and deadlocks will likely occur if the locks are aliased. In the future we may add a "freshen" method which can be called by a lock holder to check that their lock has not been broken, and to update the timestamp within it. Example usage: >>> from bzrlib.transport.memory import MemoryTransport >>> # typically will be obtained from a BzrDir, Branch, etc >>> t = MemoryTransport() >>> l = LockDir(t, 'sample-lock') >>> l.create() >>> token = l.wait_lock() >>> # do something here >>> l.unlock() Some classes of stale locks can be predicted by checking: the host name is the same as the local host name; the user name is the same as the local user; the process id no longer exists. The check on user name is not strictly necessary but helps protect against colliding host names. """ from __future__ import absolute_import # TODO: We sometimes have the problem that our attempt to rename '1234' to # 'held' fails because the transport server moves into an existing directory, # rather than failing the rename. If we made the info file name the same as # the locked directory name we would avoid this problem because moving into # the held directory would implicitly clash. However this would not mesh with # the existing locking code and needs a new format of the containing object. # -- robertc, mbp 20070628 import os import time from bzrlib import ( config, debug, errors, lock, osutils, ui, urlutils, ) from bzrlib.decorators import only_raises from bzrlib.errors import ( DirectoryNotEmpty, FileExists, LockBreakMismatch, LockBroken, LockContention, LockCorrupt, LockFailed, LockNotHeld, NoSuchFile, PathError, ResourceBusy, TransportError, ) from bzrlib.trace import mutter, note from bzrlib.osutils import format_delta, rand_chars, get_host_name from bzrlib.i18n import gettext from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import rio """) # XXX: At the moment there is no consideration of thread safety on LockDir # objects. This should perhaps be updated - e.g. if two threads try to take a # lock at the same time they should *both* get it. But then that's unlikely # to be a good idea. # TODO: Perhaps store some kind of note like the bzr command line in the lock # info? # TODO: Some kind of callback run while polling a lock to show progress # indicators. # TODO: Make sure to pass the right file and directory mode bits to all # files/dirs created. _DEFAULT_TIMEOUT_SECONDS = 30 _DEFAULT_POLL_SECONDS = 1.0 class LockDir(lock.Lock): """Write-lock guarding access to data. """ __INFO_NAME = '/info' def __init__(self, transport, path, file_modebits=0644, dir_modebits=0755, extra_holder_info=None): """Create a new LockDir object. The LockDir is initially unlocked - this just creates the object. :param transport: Transport which will contain the lock :param path: Path to the lock within the base directory of the transport. :param extra_holder_info: If passed, {str:str} dict of extra or updated information to insert into the info file when the lock is taken. """ self.transport = transport self.path = path self._lock_held = False self._locked_via_token = False self._fake_read_lock = False self._held_dir = path + '/held' self._held_info_path = self._held_dir + self.__INFO_NAME self._file_modebits = file_modebits self._dir_modebits = dir_modebits self._report_function = note self.extra_holder_info = extra_holder_info self._warned_about_lock_holder = None def __repr__(self): return '%s(%s%s)' % (self.__class__.__name__, self.transport.base, self.path) is_held = property(lambda self: self._lock_held) def create(self, mode=None): """Create the on-disk lock. This is typically only called when the object/directory containing the directory is first created. The lock is not held when it's created. """ self._trace("create lock directory") try: self.transport.mkdir(self.path, mode=mode) except (TransportError, PathError), e: raise LockFailed(self, e) def _attempt_lock(self): """Make the pending directory and attempt to rename into place. If the rename succeeds, we read back the info file to check that we really got the lock. If we fail to acquire the lock, this method is responsible for cleaning up the pending directory if possible. (But it doesn't do that yet.) :returns: The nonce of the lock, if it was successfully acquired. :raises LockContention: If the lock is held by someone else. The exception contains the info of the current holder of the lock. """ self._trace("lock_write...") start_time = time.time() try: tmpname = self._create_pending_dir() except (errors.TransportError, PathError), e: self._trace("... failed to create pending dir, %s", e) raise LockFailed(self, e) while True: try: self.transport.rename(tmpname, self._held_dir) break except (errors.TransportError, PathError, DirectoryNotEmpty, FileExists, ResourceBusy), e: self._trace("... contention, %s", e) other_holder = self.peek() self._trace("other holder is %r" % other_holder) try: self._handle_lock_contention(other_holder) except: self._remove_pending_dir(tmpname) raise except Exception, e: self._trace("... lock failed, %s", e) self._remove_pending_dir(tmpname) raise # We must check we really got the lock, because Launchpad's sftp # server at one time had a bug were the rename would successfully # move the new directory into the existing directory, which was # incorrect. It's possible some other servers or filesystems will # have a similar bug allowing someone to think they got the lock # when it's already held. # # See for one case. # # Strictly the check is unnecessary and a waste of time for most # people, but probably worth trapping if something is wrong. info = self.peek() self._trace("after locking, info=%r", info) if info is None: raise LockFailed(self, "lock was renamed into place, but " "now is missing!") if info.get('nonce') != self.nonce: self._trace("rename succeeded, " "but lock is still held by someone else") raise LockContention(self) self._lock_held = True self._trace("... lock succeeded after %dms", (time.time() - start_time) * 1000) return self.nonce def _handle_lock_contention(self, other_holder): """A lock we want to take is held by someone else. This function can: tell the user about it; possibly detect that it's safe or appropriate to steal the lock, or just raise an exception. If this function returns (without raising an exception) the lock will be attempted again. :param other_holder: A LockHeldInfo for the current holder; note that it might be None if the lock can be seen to be held but the info can't be read. """ if (other_holder is not None): if other_holder.is_lock_holder_known_dead(): if self.get_config().get('locks.steal_dead'): ui.ui_factory.show_user_warning( 'locks_steal_dead', lock_url=urlutils.join(self.transport.base, self.path), other_holder_info=unicode(other_holder)) self.force_break(other_holder) self._trace("stole lock from dead holder") return raise LockContention(self) def _remove_pending_dir(self, tmpname): """Remove the pending directory This is called if we failed to rename into place, so that the pending dirs don't clutter up the lockdir. """ self._trace("remove %s", tmpname) try: self.transport.delete(tmpname + self.__INFO_NAME) self.transport.rmdir(tmpname) except PathError, e: note(gettext("error removing pending lock: %s"), e) def _create_pending_dir(self): tmpname = '%s/%s.tmp' % (self.path, rand_chars(10)) try: self.transport.mkdir(tmpname) except NoSuchFile: # This may raise a FileExists exception # which is okay, it will be caught later and determined # to be a LockContention. self._trace("lock directory does not exist, creating it") self.create(mode=self._dir_modebits) # After creating the lock directory, try again self.transport.mkdir(tmpname) info = LockHeldInfo.for_this_process(self.extra_holder_info) self.nonce = info.get('nonce') # We use put_file_non_atomic because we just created a new unique # directory so we don't have to worry about files existing there. # We'll rename the whole directory into place to get atomic # properties self.transport.put_bytes_non_atomic(tmpname + self.__INFO_NAME, info.to_bytes()) return tmpname @only_raises(LockNotHeld, LockBroken) def unlock(self): """Release a held lock """ if self._fake_read_lock: self._fake_read_lock = False return if not self._lock_held: return lock.cant_unlock_not_held(self) if self._locked_via_token: self._locked_via_token = False self._lock_held = False else: old_nonce = self.nonce # rename before deleting, because we can't atomically remove the # whole tree start_time = time.time() self._trace("unlocking") tmpname = '%s/releasing.%s.tmp' % (self.path, rand_chars(20)) # gotta own it to unlock self.confirm() self.transport.rename(self._held_dir, tmpname) self._lock_held = False self.transport.delete(tmpname + self.__INFO_NAME) try: self.transport.rmdir(tmpname) except DirectoryNotEmpty, e: # There might have been junk left over by a rename that moved # another locker within the 'held' directory. do a slower # deletion where we list the directory and remove everything # within it. # # Maybe this should be broader to allow for ftp servers with # non-specific error messages? self._trace("doing recursive deletion of non-empty directory " "%s", tmpname) self.transport.delete_tree(tmpname) self._trace("... unlock succeeded after %dms", (time.time() - start_time) * 1000) result = lock.LockResult(self.transport.abspath(self.path), old_nonce) for hook in self.hooks['lock_released']: hook(result) def break_lock(self): """Break a lock not held by this instance of LockDir. This is a UI centric function: it uses the ui.ui_factory to prompt for input if a lock is detected and there is any doubt about it possibly being still active. force_break is the non-interactive version. :returns: LockResult for the broken lock. """ self._check_not_locked() try: holder_info = self.peek() except LockCorrupt, e: # The lock info is corrupt. if ui.ui_factory.get_boolean(u"Break (corrupt %r)" % (self,)): self.force_break_corrupt(e.file_data) return if holder_info is not None: if ui.ui_factory.confirm_action( u"Break %(lock_info)s", 'bzrlib.lockdir.break', dict(lock_info=unicode(holder_info))): result = self.force_break(holder_info) ui.ui_factory.show_message( "Broke lock %s" % result.lock_url) def force_break(self, dead_holder_info): """Release a lock held by another process. WARNING: This should only be used when the other process is dead; if it still thinks it has the lock there will be two concurrent writers. In general the user's approval should be sought for lock breaks. After the lock is broken it will not be held by any process. It is possible that another process may sneak in and take the lock before the breaking process acquires it. :param dead_holder_info: Must be the result of a previous LockDir.peek() call; this is used to check that it's still held by the same process that the user decided was dead. If this is not the current holder, LockBreakMismatch is raised. :returns: LockResult for the broken lock. """ if not isinstance(dead_holder_info, LockHeldInfo): raise ValueError("dead_holder_info: %r" % dead_holder_info) self._check_not_locked() current_info = self.peek() if current_info is None: # must have been recently released return if current_info != dead_holder_info: raise LockBreakMismatch(self, current_info, dead_holder_info) tmpname = '%s/broken.%s.tmp' % (self.path, rand_chars(20)) self.transport.rename(self._held_dir, tmpname) # check that we actually broke the right lock, not someone else; # there's a small race window between checking it and doing the # rename. broken_info_path = tmpname + self.__INFO_NAME broken_info = self._read_info_file(broken_info_path) if broken_info != dead_holder_info: raise LockBreakMismatch(self, broken_info, dead_holder_info) self.transport.delete(broken_info_path) self.transport.rmdir(tmpname) result = lock.LockResult(self.transport.abspath(self.path), current_info.get('nonce')) for hook in self.hooks['lock_broken']: hook(result) return result def force_break_corrupt(self, corrupt_info_lines): """Release a lock that has been corrupted. This is very similar to force_break, it except it doesn't assume that self.peek() can work. :param corrupt_info_lines: the lines of the corrupted info file, used to check that the lock hasn't changed between reading the (corrupt) info file and calling force_break_corrupt. """ # XXX: this copes with unparseable info files, but what about missing # info files? Or missing lock dirs? self._check_not_locked() tmpname = '%s/broken.%s.tmp' % (self.path, rand_chars(20)) self.transport.rename(self._held_dir, tmpname) # check that we actually broke the right lock, not someone else; # there's a small race window between checking it and doing the # rename. broken_info_path = tmpname + self.__INFO_NAME broken_content = self.transport.get_bytes(broken_info_path) broken_lines = osutils.split_lines(broken_content) if broken_lines != corrupt_info_lines: raise LockBreakMismatch(self, broken_lines, corrupt_info_lines) self.transport.delete(broken_info_path) self.transport.rmdir(tmpname) result = lock.LockResult(self.transport.abspath(self.path)) for hook in self.hooks['lock_broken']: hook(result) def _check_not_locked(self): """If the lock is held by this instance, raise an error.""" if self._lock_held: raise AssertionError("can't break own lock: %r" % self) def confirm(self): """Make sure that the lock is still held by this locker. This should only fail if the lock was broken by user intervention, or if the lock has been affected by a bug. If the lock is not thought to be held, raises LockNotHeld. If the lock is thought to be held but has been broken, raises LockBroken. """ if not self._lock_held: raise LockNotHeld(self) info = self.peek() if info is None: # no lock there anymore! raise LockBroken(self) if info.get('nonce') != self.nonce: # there is a lock, but not ours raise LockBroken(self) def _read_info_file(self, path): """Read one given info file. peek() reads the info file of the lock holder, if any. """ return LockHeldInfo.from_info_file_bytes( self.transport.get_bytes(path)) def peek(self): """Check if the lock is held by anyone. If it is held, this returns the lock info structure as a dict which contains some information about the current lock holder. Otherwise returns None. """ try: info = self._read_info_file(self._held_info_path) self._trace("peek -> held") return info except NoSuchFile, e: self._trace("peek -> not held") def _prepare_info(self): """Write information about a pending lock to a temporary file. """ def attempt_lock(self): """Take the lock; fail if it's already held. If you wish to block until the lock can be obtained, call wait_lock() instead. :return: The lock token. :raises LockContention: if the lock is held by someone else. """ if self._fake_read_lock: raise LockContention(self) result = self._attempt_lock() hook_result = lock.LockResult(self.transport.abspath(self.path), self.nonce) for hook in self.hooks['lock_acquired']: hook(hook_result) return result def lock_url_for_display(self): """Give a nicely-printable representation of the URL of this lock.""" # As local lock urls are correct we display them. # We avoid displaying remote lock urls. lock_url = self.transport.abspath(self.path) if lock_url.startswith('file://'): lock_url = lock_url.split('.bzr/')[0] else: lock_url = '' return lock_url def wait_lock(self, timeout=None, poll=None, max_attempts=None): """Wait a certain period for a lock. If the lock can be acquired within the bounded time, it is taken and this returns. Otherwise, LockContention is raised. Either way, this function should return within approximately `timeout` seconds. (It may be a bit more if a transport operation takes a long time to complete.) :param timeout: Approximate maximum amount of time to wait for the lock, in seconds. :param poll: Delay in seconds between retrying the lock. :param max_attempts: Maximum number of times to try to lock. :return: The lock token. """ if timeout is None: timeout = _DEFAULT_TIMEOUT_SECONDS if poll is None: poll = _DEFAULT_POLL_SECONDS # XXX: the transport interface doesn't let us guard against operations # there taking a long time, so the total elapsed time or poll interval # may be more than was requested. deadline = time.time() + timeout deadline_str = None last_info = None attempt_count = 0 lock_url = self.lock_url_for_display() while True: attempt_count += 1 try: return self.attempt_lock() except LockContention: # possibly report the blockage, then try again pass # TODO: In a few cases, we find out that there's contention by # reading the held info and observing that it's not ours. In # those cases it's a bit redundant to read it again. However, # the normal case (??) is that the rename fails and so we # don't know who holds the lock. For simplicity we peek # always. new_info = self.peek() if new_info is not None and new_info != last_info: if last_info is None: start = gettext('Unable to obtain') else: start = gettext('Lock owner changed for') last_info = new_info msg = gettext('{0} lock {1} {2}.').format(start, lock_url, new_info) if deadline_str is None: deadline_str = time.strftime('%H:%M:%S', time.localtime(deadline)) if timeout > 0: msg += '\n' + gettext( 'Will continue to try until %s, unless ' 'you press Ctrl-C.') % deadline_str msg += '\n' + gettext('See "bzr help break-lock" for more.') self._report_function(msg) if (max_attempts is not None) and (attempt_count >= max_attempts): self._trace("exceeded %d attempts") raise LockContention(self) if time.time() + poll < deadline: self._trace("waiting %ss", poll) time.sleep(poll) else: # As timeout is always 0 for remote locks # this block is applicable only for local # lock contention self._trace("timeout after waiting %ss", timeout) raise LockContention('(local)', lock_url) def leave_in_place(self): self._locked_via_token = True def dont_leave_in_place(self): self._locked_via_token = False def lock_write(self, token=None): """Wait for and acquire the lock. :param token: if this is already locked, then lock_write will fail unless the token matches the existing lock. :returns: a token if this instance supports tokens, otherwise None. :raises TokenLockingNotSupported: when a token is given but this instance doesn't support using token locks. :raises MismatchedToken: if the specified token doesn't match the token of the existing lock. A token should be passed in if you know that you have locked the object some other way, and need to synchronise this object's state with that fact. XXX: docstring duplicated from LockableFiles.lock_write. """ if token is not None: self.validate_token(token) self.nonce = token self._lock_held = True self._locked_via_token = True return token else: return self.wait_lock() def lock_read(self): """Compatibility-mode shared lock. LockDir doesn't support shared read-only locks, so this just pretends that the lock is taken but really does nothing. """ # At the moment Branches are commonly locked for read, but # we can't rely on that remotely. Once this is cleaned up, # reenable this warning to prevent it coming back in # -- mbp 20060303 ## warn("LockDir.lock_read falls back to write lock") if self._lock_held or self._fake_read_lock: raise LockContention(self) self._fake_read_lock = True def validate_token(self, token): if token is not None: info = self.peek() if info is None: # Lock isn't held lock_token = None else: lock_token = info.get('nonce') if token != lock_token: raise errors.TokenMismatch(token, lock_token) else: self._trace("revalidated by token %r", token) def _trace(self, format, *args): if 'lock' not in debug.debug_flags: return mutter(str(self) + ": " + (format % args)) def get_config(self): """Get the configuration that governs this lockdir.""" # XXX: This really should also use the locationconfig at least, but # that seems a bit hard to hook up at the moment. -- mbp 20110329 # FIXME: The above is still true ;) -- vila 20110811 return config.GlobalStack() class LockHeldInfo(object): """The information recorded about a held lock. This information is recorded into the lock when it's taken, and it can be read back by any process with access to the lockdir. It can be used, for example, to tell the user who holds the lock, or to try to detect whether the lock holder is still alive. Prior to bzr 2.4 a simple dict was used instead of an object. """ def __init__(self, info_dict): self.info_dict = info_dict def __repr__(self): """Return a debugging representation of this object.""" return "%s(%r)" % (self.__class__.__name__, self.info_dict) def __unicode__(self): """Return a user-oriented description of this object.""" d = self.to_readable_dict() return ( gettext( u'held by %(user)s on %(hostname)s (process #%(pid)s), ' u'acquired %(time_ago)s') % d) def to_readable_dict(self): """Turn the holder info into a dict of human-readable attributes. For example, the start time is presented relative to the current time, rather than as seconds since the epoch. Returns a list of [user, hostname, pid, time_ago] all as readable strings. """ start_time = self.info_dict.get('start_time') if start_time is None: time_ago = '(unknown)' else: time_ago = format_delta( time.time() - int(self.info_dict['start_time'])) user = self.info_dict.get('user', '') hostname = self.info_dict.get('hostname', '') pid = self.info_dict.get('pid', '') return dict( user=user, hostname=hostname, pid=pid, time_ago=time_ago) def get(self, field_name): """Return the contents of a field from the lock info, or None.""" return self.info_dict.get(field_name) @classmethod def for_this_process(cls, extra_holder_info): """Return a new LockHeldInfo for a lock taken by this process. """ info = dict( hostname=get_host_name(), pid=str(os.getpid()), nonce=rand_chars(20), start_time=str(int(time.time())), user=get_username_for_lock_info(), ) if extra_holder_info is not None: info.update(extra_holder_info) return cls(info) def to_bytes(self): s = rio.Stanza(**self.info_dict) return s.to_string() @classmethod def from_info_file_bytes(cls, info_file_bytes): """Construct from the contents of the held file.""" lines = osutils.split_lines(info_file_bytes) try: stanza = rio.read_stanza(lines) except ValueError, e: mutter('Corrupt lock info file: %r', lines) raise LockCorrupt("could not parse lock info file: " + str(e), lines) if stanza is None: # see bug 185013; we fairly often end up with the info file being # empty after an interruption; we could log a message here but # there may not be much we can say return cls({}) else: return cls(stanza.as_dict()) def __cmp__(self, other): """Value comparison of lock holders.""" return ( cmp(type(self), type(other)) or cmp(self.info_dict, other.info_dict)) def is_locked_by_this_process(self): """True if this process seems to be the current lock holder.""" return ( self.get('hostname') == get_host_name() and self.get('pid') == str(os.getpid()) and self.get('user') == get_username_for_lock_info()) def is_lock_holder_known_dead(self): """True if the lock holder process is known to be dead. False if it's either known to be still alive, or if we just can't tell. We can be fairly sure the lock holder is dead if it declared the same hostname and there is no process with the given pid alive. If people have multiple machines with the same hostname this may cause trouble. This doesn't check whether the lock holder is in fact the same process calling this method. (In that case it will return true.) """ if self.get('hostname') != get_host_name(): return False if self.get('hostname') == 'localhost': # Too ambiguous. return False if self.get('user') != get_username_for_lock_info(): # Could well be another local process by a different user, but # just to be safe we won't conclude about this either. return False pid_str = self.info_dict.get('pid', None) if not pid_str: mutter("no pid recorded in %r" % (self, )) return False try: pid = int(pid_str) except ValueError: mutter("can't parse pid %r from %r" % (pid_str, self)) return False return osutils.is_local_pid_dead(pid) def get_username_for_lock_info(): """Get a username suitable for putting into a lock. It's ok if what's written here is not a proper email address as long as it gives some clue who the user is. """ try: return config.GlobalStack().get('email') except errors.NoWhoami: return osutils.getuser_unicode() bzr-2.7.0/bzrlib/log.py0000644000000000000000000024412512201771205013067 0ustar 00000000000000# Copyright (C) 2005-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Code to show logs of changes. Various flavors of log can be produced: * for one file, or the whole tree, and (not done yet) for files in a given directory * in "verbose" mode with a description of what changed from one version to the next * with file-ids and revision-ids shown Logs are actually written out through an abstract LogFormatter interface, which allows for different preferred formats. Plugins can register formats too. Logs can be produced in either forward (oldest->newest) or reverse (newest->oldest) order. Logs can be filtered to show only revisions matching a particular search string, or within a particular range of revisions. The range can be given as date/times, which are reduced to revisions before calling in here. In verbose mode we show a summary of what changed in each particular revision. Note that this is the delta for changes in that revision relative to its left-most parent, not the delta relative to the last logged revision. So for example if you ask for a verbose log of changes touching hello.c you will get a list of those revisions also listing other things that were changed in the same revision, but not all the changes since the previous revision that touched hello.c. """ from __future__ import absolute_import import codecs from cStringIO import StringIO from itertools import ( chain, izip, ) import re import sys from warnings import ( warn, ) from bzrlib.lazy_import import lazy_import lazy_import(globals(), """ from bzrlib import ( config, controldir, diff, errors, foreign, repository as _mod_repository, revision as _mod_revision, revisionspec, tsort, ) from bzrlib.i18n import gettext, ngettext """) from bzrlib import ( lazy_regex, registry, ) from bzrlib.osutils import ( format_date, format_date_with_offset_in_original_timezone, get_diff_header_encoding, get_terminal_encoding, terminal_width, ) def find_touching_revisions(branch, file_id): """Yield a description of revisions which affect the file_id. Each returned element is (revno, revision_id, description) This is the list of revisions where the file is either added, modified, renamed or deleted. TODO: Perhaps some way to limit this to only particular revisions, or to traverse a non-mainline set of revisions? """ last_ie = None last_path = None revno = 1 graph = branch.repository.get_graph() history = list(graph.iter_lefthand_ancestry(branch.last_revision(), [_mod_revision.NULL_REVISION])) for revision_id in reversed(history): this_inv = branch.repository.get_inventory(revision_id) if this_inv.has_id(file_id): this_ie = this_inv[file_id] this_path = this_inv.id2path(file_id) else: this_ie = this_path = None # now we know how it was last time, and how it is in this revision. # are those two states effectively the same or not? if not this_ie and not last_ie: # not present in either pass elif this_ie and not last_ie: yield revno, revision_id, "added " + this_path elif not this_ie and last_ie: # deleted here yield revno, revision_id, "deleted " + last_path elif this_path != last_path: yield revno, revision_id, ("renamed %s => %s" % (last_path, this_path)) elif (this_ie.text_size != last_ie.text_size or this_ie.text_sha1 != last_ie.text_sha1): yield revno, revision_id, "modified " + this_path last_ie = this_ie last_path = this_path revno += 1 def show_log(branch, lf, specific_fileid=None, verbose=False, direction='reverse', start_revision=None, end_revision=None, search=None, limit=None, show_diff=False, match=None): """Write out human-readable log of commits to this branch. This function is being retained for backwards compatibility but should not be extended with new parameters. Use the new Logger class instead, eg. Logger(branch, rqst).show(lf), adding parameters to the make_log_request_dict function. :param lf: The LogFormatter object showing the output. :param specific_fileid: If not None, list only the commits affecting the specified file, rather than all commits. :param verbose: If True show added/changed/deleted/renamed files. :param direction: 'reverse' (default) is latest to earliest; 'forward' is earliest to latest. :param start_revision: If not None, only show revisions >= start_revision :param end_revision: If not None, only show revisions <= end_revision :param search: If not None, only show revisions with matching commit messages :param limit: If set, shows only 'limit' revisions, all revisions are shown if None or 0. :param show_diff: If True, output a diff after each revision. :param match: Dictionary of search lists to use when matching revision properties. """ # Convert old-style parameters to new-style parameters if specific_fileid is not None: file_ids = [specific_fileid] else: file_ids = None if verbose: if file_ids: delta_type = 'partial' else: delta_type = 'full' else: delta_type = None if show_diff: if file_ids: diff_type = 'partial' else: diff_type = 'full' else: diff_type = None # Build the request and execute it rqst = make_log_request_dict(direction=direction, specific_fileids=file_ids, start_revision=start_revision, end_revision=end_revision, limit=limit, message_search=search, delta_type=delta_type, diff_type=diff_type) Logger(branch, rqst).show(lf) # Note: This needs to be kept in sync with the defaults in # make_log_request_dict() below _DEFAULT_REQUEST_PARAMS = { 'direction': 'reverse', 'levels': None, 'generate_tags': True, 'exclude_common_ancestry': False, '_match_using_deltas': True, } def make_log_request_dict(direction='reverse', specific_fileids=None, start_revision=None, end_revision=None, limit=None, message_search=None, levels=None, generate_tags=True, delta_type=None, diff_type=None, _match_using_deltas=True, exclude_common_ancestry=False, match=None, signature=False, omit_merges=False, ): """Convenience function for making a logging request dictionary. Using this function may make code slightly safer by ensuring parameters have the correct names. It also provides a reference point for documenting the supported parameters. :param direction: 'reverse' (default) is latest to earliest; 'forward' is earliest to latest. :param specific_fileids: If not None, only include revisions affecting the specified files, rather than all revisions. :param start_revision: If not None, only generate revisions >= start_revision :param end_revision: If not None, only generate revisions <= end_revision :param limit: If set, generate only 'limit' revisions, all revisions are shown if None or 0. :param message_search: If not None, only include revisions with matching commit messages :param levels: the number of levels of revisions to generate; 1 for just the mainline; 0 for all levels, or None for a sensible default. :param generate_tags: If True, include tags for matched revisions. ` :param delta_type: Either 'full', 'partial' or None. 'full' means generate the complete delta - adds/deletes/modifies/etc; 'partial' means filter the delta using specific_fileids; None means do not generate any delta. :param diff_type: Either 'full', 'partial' or None. 'full' means generate the complete diff - adds/deletes/modifies/etc; 'partial' means filter the diff using specific_fileids; None means do not generate any diff. :param _match_using_deltas: a private parameter controlling the algorithm used for matching specific_fileids. This parameter may be removed in the future so bzrlib client code should NOT use it. :param exclude_common_ancestry: Whether -rX..Y should be interpreted as a range operator or as a graph difference. :param signature: show digital signature information :param match: Dictionary of list of search strings to use when filtering revisions. Keys can be 'message', 'author', 'committer', 'bugs' or the empty string to match any of the preceding properties. :param omit_merges: If True, commits with more than one parent are omitted. """ # Take care of old style message_search parameter if message_search: if match: if 'message' in match: match['message'].append(message_search) else: match['message'] = [message_search] else: match={ 'message': [message_search] } return { 'direction': direction, 'specific_fileids': specific_fileids, 'start_revision': start_revision, 'end_revision': end_revision, 'limit': limit, 'levels': levels, 'generate_tags': generate_tags, 'delta_type': delta_type, 'diff_type': diff_type, 'exclude_common_ancestry': exclude_common_ancestry, 'signature': signature, 'match': match, 'omit_merges': omit_merges, # Add 'private' attributes for features that may be deprecated '_match_using_deltas': _match_using_deltas, } def _apply_log_request_defaults(rqst): """Apply default values to a request dictionary.""" result = _DEFAULT_REQUEST_PARAMS.copy() if rqst: result.update(rqst) return result def format_signature_validity(rev_id, repo): """get the signature validity :param rev_id: revision id to validate :param repo: repository of revision :return: human readable string to print to log """ from bzrlib import gpg gpg_strategy = gpg.GPGStrategy(None) result = repo.verify_revision_signature(rev_id, gpg_strategy) if result[0] == gpg.SIGNATURE_VALID: return u"valid signature from {0}".format(result[1]) if result[0] == gpg.SIGNATURE_KEY_MISSING: return "unknown key {0}".format(result[1]) if result[0] == gpg.SIGNATURE_NOT_VALID: return "invalid signature!" if result[0] == gpg.SIGNATURE_NOT_SIGNED: return "no signature" class LogGenerator(object): """A generator of log revisions.""" def iter_log_revisions(self): """Iterate over LogRevision objects. :return: An iterator yielding LogRevision objects. """ raise NotImplementedError(self.iter_log_revisions) class Logger(object): """An object that generates, formats and displays a log.""" def __init__(self, branch, rqst): """Create a Logger. :param branch: the branch to log :param rqst: A dictionary specifying the query parameters. See make_log_request_dict() for supported values. """ self.branch = branch self.rqst = _apply_log_request_defaults(rqst) def show(self, lf): """Display the log. :param lf: The LogFormatter object to send the output to. """ if not isinstance(lf, LogFormatter): warn("not a LogFormatter instance: %r" % lf) self.branch.lock_read() try: if getattr(lf, 'begin_log', None): lf.begin_log() self._show_body(lf) if getattr(lf, 'end_log', None): lf.end_log() finally: self.branch.unlock() def _show_body(self, lf): """Show the main log output. Subclasses may wish to override this. """ # Tweak the LogRequest based on what the LogFormatter can handle. # (There's no point generating stuff if the formatter can't display it.) rqst = self.rqst if rqst['levels'] is None or lf.get_levels() > rqst['levels']: # user didn't specify levels, use whatever the LF can handle: rqst['levels'] = lf.get_levels() if not getattr(lf, 'supports_tags', False): rqst['generate_tags'] = False if not getattr(lf, 'supports_delta', False): rqst['delta_type'] = None if not getattr(lf, 'supports_diff', False): rqst['diff_type'] = None if not getattr(lf, 'supports_signatures', False): rqst['signature'] = False # Find and print the interesting revisions generator = self._generator_factory(self.branch, rqst) for lr in generator.iter_log_revisions(): lf.log_revision(lr) lf.show_advice() def _generator_factory(self, branch, rqst): """Make the LogGenerator object to use. Subclasses may wish to override this. """ return _DefaultLogGenerator(branch, rqst) class _StartNotLinearAncestor(Exception): """Raised when a start revision is not found walking left-hand history.""" class _DefaultLogGenerator(LogGenerator): """The default generator of log revisions.""" def __init__(self, branch, rqst): self.branch = branch self.rqst = rqst if rqst.get('generate_tags') and branch.supports_tags(): self.rev_tag_dict = branch.tags.get_reverse_tag_dict() else: self.rev_tag_dict = {} def iter_log_revisions(self): """Iterate over LogRevision objects. :return: An iterator yielding LogRevision objects. """ rqst = self.rqst levels = rqst.get('levels') limit = rqst.get('limit') diff_type = rqst.get('diff_type') show_signature = rqst.get('signature') omit_merges = rqst.get('omit_merges') log_count = 0 revision_iterator = self._create_log_revision_iterator() for revs in revision_iterator: for (rev_id, revno, merge_depth), rev, delta in revs: # 0 levels means show everything; merge_depth counts from 0 if levels != 0 and merge_depth >= levels: continue if omit_merges and len(rev.parent_ids) > 1: continue if diff_type is None: diff = None else: diff = self._format_diff(rev, rev_id, diff_type) if show_signature: signature = format_signature_validity(rev_id, self.branch.repository) else: signature = None yield LogRevision(rev, revno, merge_depth, delta, self.rev_tag_dict.get(rev_id), diff, signature) if limit: log_count += 1 if log_count >= limit: return def _format_diff(self, rev, rev_id, diff_type): repo = self.branch.repository if len(rev.parent_ids) == 0: ancestor_id = _mod_revision.NULL_REVISION else: ancestor_id = rev.parent_ids[0] tree_1 = repo.revision_tree(ancestor_id) tree_2 = repo.revision_tree(rev_id) file_ids = self.rqst.get('specific_fileids') if diff_type == 'partial' and file_ids is not None: specific_files = [tree_2.id2path(id) for id in file_ids] else: specific_files = None s = StringIO() path_encoding = get_diff_header_encoding() diff.show_diff_trees(tree_1, tree_2, s, specific_files, old_label='', new_label='', path_encoding=path_encoding) return s.getvalue() def _create_log_revision_iterator(self): """Create a revision iterator for log. :return: An iterator over lists of ((rev_id, revno, merge_depth), rev, delta). """ self.start_rev_id, self.end_rev_id = _get_revision_limits( self.branch, self.rqst.get('start_revision'), self.rqst.get('end_revision')) if self.rqst.get('_match_using_deltas'): return self._log_revision_iterator_using_delta_matching() else: # We're using the per-file-graph algorithm. This scales really # well but only makes sense if there is a single file and it's # not a directory file_count = len(self.rqst.get('specific_fileids')) if file_count != 1: raise BzrError("illegal LogRequest: must match-using-deltas " "when logging %d files" % file_count) return self._log_revision_iterator_using_per_file_graph() def _log_revision_iterator_using_delta_matching(self): # Get the base revisions, filtering by the revision range rqst = self.rqst generate_merge_revisions = rqst.get('levels') != 1 delayed_graph_generation = not rqst.get('specific_fileids') and ( rqst.get('limit') or self.start_rev_id or self.end_rev_id) view_revisions = _calc_view_revisions( self.branch, self.start_rev_id, self.end_rev_id, rqst.get('direction'), generate_merge_revisions=generate_merge_revisions, delayed_graph_generation=delayed_graph_generation, exclude_common_ancestry=rqst.get('exclude_common_ancestry')) # Apply the other filters return make_log_rev_iterator(self.branch, view_revisions, rqst.get('delta_type'), rqst.get('match'), file_ids=rqst.get('specific_fileids'), direction=rqst.get('direction')) def _log_revision_iterator_using_per_file_graph(self): # Get the base revisions, filtering by the revision range. # Note that we always generate the merge revisions because # filter_revisions_touching_file_id() requires them ... rqst = self.rqst view_revisions = _calc_view_revisions( self.branch, self.start_rev_id, self.end_rev_id, rqst.get('direction'), generate_merge_revisions=True, exclude_common_ancestry=rqst.get('exclude_common_ancestry')) if not isinstance(view_revisions, list): view_revisions = list(view_revisions) view_revisions = _filter_revisions_touching_file_id(self.branch, rqst.get('specific_fileids')[0], view_revisions, include_merges=rqst.get('levels') != 1) return make_log_rev_iterator(self.branch, view_revisions, rqst.get('delta_type'), rqst.get('match')) def _calc_view_revisions(branch, start_rev_id, end_rev_id, direction, generate_merge_revisions, delayed_graph_generation=False, exclude_common_ancestry=False, ): """Calculate the revisions to view. :return: An iterator of (revision_id, dotted_revno, merge_depth) tuples OR a list of the same tuples. """ if (exclude_common_ancestry and start_rev_id == end_rev_id): raise errors.BzrCommandError(gettext( '--exclude-common-ancestry requires two different revisions')) if direction not in ('reverse', 'forward'): raise ValueError(gettext('invalid direction %r') % direction) br_revno, br_rev_id = branch.last_revision_info() if br_revno == 0: return [] if (end_rev_id and start_rev_id == end_rev_id and (not generate_merge_revisions or not _has_merges(branch, end_rev_id))): # If a single revision is requested, check we can handle it return _generate_one_revision(branch, end_rev_id, br_rev_id, br_revno) if not generate_merge_revisions: try: # If we only want to see linear revisions, we can iterate ... iter_revs = _linear_view_revisions( branch, start_rev_id, end_rev_id, exclude_common_ancestry=exclude_common_ancestry) # If a start limit was given and it's not obviously an # ancestor of the end limit, check it before outputting anything if (direction == 'forward' or (start_rev_id and not _is_obvious_ancestor( branch, start_rev_id, end_rev_id))): iter_revs = list(iter_revs) if direction == 'forward': iter_revs = reversed(iter_revs) return iter_revs except _StartNotLinearAncestor: # Switch to the slower implementation that may be able to find a # non-obvious ancestor out of the left-hand history. pass iter_revs = _generate_all_revisions(branch, start_rev_id, end_rev_id, direction, delayed_graph_generation, exclude_common_ancestry) if direction == 'forward': iter_revs = _rebase_merge_depth(reverse_by_depth(list(iter_revs))) return iter_revs def _generate_one_revision(branch, rev_id, br_rev_id, br_revno): if rev_id == br_rev_id: # It's the tip return [(br_rev_id, br_revno, 0)] else: revno_str = _compute_revno_str(branch, rev_id) return [(rev_id, revno_str, 0)] def _generate_all_revisions(branch, start_rev_id, end_rev_id, direction, delayed_graph_generation, exclude_common_ancestry=False): # On large trees, generating the merge graph can take 30-60 seconds # so we delay doing it until a merge is detected, incrementally # returning initial (non-merge) revisions while we can. # The above is only true for old formats (<= 0.92), for newer formats, a # couple of seconds only should be needed to load the whole graph and the # other graph operations needed are even faster than that -- vila 100201 initial_revisions = [] if delayed_graph_generation: try: for rev_id, revno, depth in _linear_view_revisions( branch, start_rev_id, end_rev_id, exclude_common_ancestry): if _has_merges(branch, rev_id): # The end_rev_id can be nested down somewhere. We need an # explicit ancestry check. There is an ambiguity here as we # may not raise _StartNotLinearAncestor for a revision that # is an ancestor but not a *linear* one. But since we have # loaded the graph to do the check (or calculate a dotted # revno), we may as well accept to show the log... We need # the check only if start_rev_id is not None as all # revisions have _mod_revision.NULL_REVISION as an ancestor # -- vila 20100319 graph = branch.repository.get_graph() if (start_rev_id is not None and not graph.is_ancestor(start_rev_id, end_rev_id)): raise _StartNotLinearAncestor() # Since we collected the revisions so far, we need to # adjust end_rev_id. end_rev_id = rev_id break else: initial_revisions.append((rev_id, revno, depth)) else: # No merged revisions found return initial_revisions except _StartNotLinearAncestor: # A merge was never detected so the lower revision limit can't # be nested down somewhere raise errors.BzrCommandError(gettext('Start revision not found in' ' history of end revision.')) # We exit the loop above because we encounter a revision with merges, from # this revision, we need to switch to _graph_view_revisions. # A log including nested merges is required. If the direction is reverse, # we rebase the initial merge depths so that the development line is # shown naturally, i.e. just like it is for linear logging. We can easily # make forward the exact opposite display, but showing the merge revisions # indented at the end seems slightly nicer in that case. view_revisions = chain(iter(initial_revisions), _graph_view_revisions(branch, start_rev_id, end_rev_id, rebase_initial_depths=(direction == 'reverse'), exclude_common_ancestry=exclude_common_ancestry)) return view_revisions def _has_merges(branch, rev_id): """Does a revision have multiple parents or not?""" parents = branch.repository.get_parent_map([rev_id]).get(rev_id, []) return len(parents) > 1 def _compute_revno_str(branch, rev_id): """Compute the revno string from a rev_id. :return: The revno string, or None if the revision is not in the supplied branch. """ try: revno = branch.revision_id_to_dotted_revno(rev_id) except errors.NoSuchRevision: # The revision must be outside of this branch return None else: return '.'.join(str(n) for n in revno) def _is_obvious_ancestor(branch, start_rev_id, end_rev_id): """Is start_rev_id an obvious ancestor of end_rev_id?""" if start_rev_id and end_rev_id: try: start_dotted = branch.revision_id_to_dotted_revno(start_rev_id) end_dotted = branch.revision_id_to_dotted_revno(end_rev_id) except errors.NoSuchRevision: # one or both is not in the branch; not obvious return False if len(start_dotted) == 1 and len(end_dotted) == 1: # both on mainline return start_dotted[0] <= end_dotted[0] elif (len(start_dotted) == 3 and len(end_dotted) == 3 and start_dotted[0:1] == end_dotted[0:1]): # both on same development line return start_dotted[2] <= end_dotted[2] else: # not obvious return False # if either start or end is not specified then we use either the first or # the last revision and *they* are obvious ancestors. return True def _linear_view_revisions(branch, start_rev_id, end_rev_id, exclude_common_ancestry=False): """Calculate a sequence of revisions to view, newest to oldest. :param start_rev_id: the lower revision-id :param end_rev_id: the upper revision-id :param exclude_common_ancestry: Whether the start_rev_id should be part of the iterated revisions. :return: An iterator of (revision_id, dotted_revno, merge_depth) tuples. :raises _StartNotLinearAncestor: if a start_rev_id is specified but is not found walking the left-hand history """ br_revno, br_rev_id = branch.last_revision_info() repo = branch.repository graph = repo.get_graph() if start_rev_id is None and end_rev_id is None: cur_revno = br_revno for revision_id in graph.iter_lefthand_ancestry(br_rev_id, (_mod_revision.NULL_REVISION,)): yield revision_id, str(cur_revno), 0 cur_revno -= 1 else: if end_rev_id is None: end_rev_id = br_rev_id found_start = start_rev_id is None for revision_id in graph.iter_lefthand_ancestry(end_rev_id, (_mod_revision.NULL_REVISION,)): revno_str = _compute_revno_str(branch, revision_id) if not found_start and revision_id == start_rev_id: if not exclude_common_ancestry: yield revision_id, revno_str, 0 found_start = True break else: yield revision_id, revno_str, 0 else: if not found_start: raise _StartNotLinearAncestor() def _graph_view_revisions(branch, start_rev_id, end_rev_id, rebase_initial_depths=True, exclude_common_ancestry=False): """Calculate revisions to view including merges, newest to oldest. :param branch: the branch :param start_rev_id: the lower revision-id :param end_rev_id: the upper revision-id :param rebase_initial_depth: should depths be rebased until a mainline revision is found? :return: An iterator of (revision_id, dotted_revno, merge_depth) tuples. """ if exclude_common_ancestry: stop_rule = 'with-merges-without-common-ancestry' else: stop_rule = 'with-merges' view_revisions = branch.iter_merge_sorted_revisions( start_revision_id=end_rev_id, stop_revision_id=start_rev_id, stop_rule=stop_rule) if not rebase_initial_depths: for (rev_id, merge_depth, revno, end_of_merge ) in view_revisions: yield rev_id, '.'.join(map(str, revno)), merge_depth else: # We're following a development line starting at a merged revision. # We need to adjust depths down by the initial depth until we find # a depth less than it. Then we use that depth as the adjustment. # If and when we reach the mainline, depth adjustment ends. depth_adjustment = None for (rev_id, merge_depth, revno, end_of_merge ) in view_revisions: if depth_adjustment is None: depth_adjustment = merge_depth if depth_adjustment: if merge_depth < depth_adjustment: # From now on we reduce the depth adjustement, this can be # surprising for users. The alternative requires two passes # which breaks the fast display of the first revision # though. depth_adjustment = merge_depth merge_depth -= depth_adjustment yield rev_id, '.'.join(map(str, revno)), merge_depth def _rebase_merge_depth(view_revisions): """Adjust depths upwards so the top level is 0.""" # If either the first or last revision have a merge_depth of 0, we're done if view_revisions and view_revisions[0][2] and view_revisions[-1][2]: min_depth = min([d for r,n,d in view_revisions]) if min_depth != 0: view_revisions = [(r,n,d-min_depth) for r,n,d in view_revisions] return view_revisions def make_log_rev_iterator(branch, view_revisions, generate_delta, search, file_ids=None, direction='reverse'): """Create a revision iterator for log. :param branch: The branch being logged. :param view_revisions: The revisions being viewed. :param generate_delta: Whether to generate a delta for each revision. Permitted values are None, 'full' and 'partial'. :param search: A user text search string. :param file_ids: If non empty, only revisions matching one or more of the file-ids are to be kept. :param direction: the direction in which view_revisions is sorted :return: An iterator over lists of ((rev_id, revno, merge_depth), rev, delta). """ # Convert view_revisions into (view, None, None) groups to fit with # the standard interface here. if type(view_revisions) == list: # A single batch conversion is faster than many incremental ones. # As we have all the data, do a batch conversion. nones = [None] * len(view_revisions) log_rev_iterator = iter([zip(view_revisions, nones, nones)]) else: def _convert(): for view in view_revisions: yield (view, None, None) log_rev_iterator = iter([_convert()]) for adapter in log_adapters: # It would be nicer if log adapters were first class objects # with custom parameters. This will do for now. IGC 20090127 if adapter == _make_delta_filter: log_rev_iterator = adapter(branch, generate_delta, search, log_rev_iterator, file_ids, direction) else: log_rev_iterator = adapter(branch, generate_delta, search, log_rev_iterator) return log_rev_iterator def _make_search_filter(branch, generate_delta, match, log_rev_iterator): """Create a filtered iterator of log_rev_iterator matching on a regex. :param branch: The branch being logged. :param generate_delta: Whether to generate a delta for each revision. :param match: A dictionary with properties as keys and lists of strings as values. To match, a revision may match any of the supplied strings within a single property but must match at least one string for each property. :param log_rev_iterator: An input iterator containing all revisions that could be displayed, in lists. :return: An iterator over lists of ((rev_id, revno, merge_depth), rev, delta). """ if match is None: return log_rev_iterator searchRE = [(k, [re.compile(x, re.IGNORECASE) for x in v]) for (k,v) in match.iteritems()] return _filter_re(searchRE, log_rev_iterator) def _filter_re(searchRE, log_rev_iterator): for revs in log_rev_iterator: new_revs = [rev for rev in revs if _match_filter(searchRE, rev[1])] if new_revs: yield new_revs def _match_filter(searchRE, rev): strings = { 'message': (rev.message,), 'committer': (rev.committer,), 'author': (rev.get_apparent_authors()), 'bugs': list(rev.iter_bugs()) } strings[''] = [item for inner_list in strings.itervalues() for item in inner_list] for (k,v) in searchRE: if k in strings and not _match_any_filter(strings[k], v): return False return True def _match_any_filter(strings, res): return any([filter(None, map(re.search, strings)) for re in res]) def _make_delta_filter(branch, generate_delta, search, log_rev_iterator, fileids=None, direction='reverse'): """Add revision deltas to a log iterator if needed. :param branch: The branch being logged. :param generate_delta: Whether to generate a delta for each revision. Permitted values are None, 'full' and 'partial'. :param search: A user text search string. :param log_rev_iterator: An input iterator containing all revisions that could be displayed, in lists. :param fileids: If non empty, only revisions matching one or more of the file-ids are to be kept. :param direction: the direction in which view_revisions is sorted :return: An iterator over lists of ((rev_id, revno, merge_depth), rev, delta). """ if not generate_delta and not fileids: return log_rev_iterator return _generate_deltas(branch.repository, log_rev_iterator, generate_delta, fileids, direction) def _generate_deltas(repository, log_rev_iterator, delta_type, fileids, direction): """Create deltas for each batch of revisions in log_rev_iterator. If we're only generating deltas for the sake of filtering against file-ids, we stop generating deltas once all file-ids reach the appropriate life-cycle point. If we're receiving data newest to oldest, then that life-cycle point is 'add', otherwise it's 'remove'. """ check_fileids = fileids is not None and len(fileids) > 0 if check_fileids: fileid_set = set(fileids) if direction == 'reverse': stop_on = 'add' else: stop_on = 'remove' else: fileid_set = None for revs in log_rev_iterator: # If we were matching against fileids and we've run out, # there's nothing left to do if check_fileids and not fileid_set: return revisions = [rev[1] for rev in revs] new_revs = [] if delta_type == 'full' and not check_fileids: deltas = repository.get_deltas_for_revisions(revisions) for rev, delta in izip(revs, deltas): new_revs.append((rev[0], rev[1], delta)) else: deltas = repository.get_deltas_for_revisions(revisions, fileid_set) for rev, delta in izip(revs, deltas): if check_fileids: if delta is None or not delta.has_changed(): continue else: _update_fileids(delta, fileid_set, stop_on) if delta_type is None: delta = None elif delta_type == 'full': # If the file matches all the time, rebuilding # a full delta like this in addition to a partial # one could be slow. However, it's likely that # most revisions won't get this far, making it # faster to filter on the partial deltas and # build the occasional full delta than always # building full deltas and filtering those. rev_id = rev[0][0] delta = repository.get_revision_delta(rev_id) new_revs.append((rev[0], rev[1], delta)) yield new_revs def _update_fileids(delta, fileids, stop_on): """Update the set of file-ids to search based on file lifecycle events. :param fileids: a set of fileids to update :param stop_on: either 'add' or 'remove' - take file-ids out of the fileids set once their add or remove entry is detected respectively """ if stop_on == 'add': for item in delta.added: if item[1] in fileids: fileids.remove(item[1]) elif stop_on == 'delete': for item in delta.removed: if item[1] in fileids: fileids.remove(item[1]) def _make_revision_objects(branch, generate_delta, search, log_rev_iterator): """Extract revision objects from the repository :param branch: The branch being logged. :param generate_delta: Whether to generate a delta for each revision. :param search: A user text search string. :param log_rev_iterator: An input iterator containing all revisions that could be displayed, in lists. :return: An iterator over lists of ((rev_id, revno, merge_depth), rev, delta). """ repository = branch.repository for revs in log_rev_iterator: # r = revision_id, n = revno, d = merge depth revision_ids = [view[0] for view, _, _ in revs] revisions = repository.get_revisions(revision_ids) revs = [(rev[0], revision, rev[2]) for rev, revision in izip(revs, revisions)] yield revs def _make_batch_filter(branch, generate_delta, search, log_rev_iterator): """Group up a single large batch into smaller ones. :param branch: The branch being logged. :param generate_delta: Whether to generate a delta for each revision. :param search: A user text search string. :param log_rev_iterator: An input iterator containing all revisions that could be displayed, in lists. :return: An iterator over lists of ((rev_id, revno, merge_depth), rev, delta). """ num = 9 for batch in log_rev_iterator: batch = iter(batch) while True: step = [detail for _, detail in zip(range(num), batch)] if len(step) == 0: break yield step num = min(int(num * 1.5), 200) def _get_revision_limits(branch, start_revision, end_revision): """Get and check revision limits. :param branch: The branch containing the revisions. :param start_revision: The first revision to be logged. For backwards compatibility this may be a mainline integer revno, but for merge revision support a RevisionInfo is expected. :param end_revision: The last revision to be logged. For backwards compatibility this may be a mainline integer revno, but for merge revision support a RevisionInfo is expected. :return: (start_rev_id, end_rev_id) tuple. """ branch_revno, branch_rev_id = branch.last_revision_info() start_rev_id = None if start_revision is None: start_revno = 1 else: if isinstance(start_revision, revisionspec.RevisionInfo): start_rev_id = start_revision.rev_id start_revno = start_revision.revno or 1 else: branch.check_real_revno(start_revision) start_revno = start_revision start_rev_id = branch.get_rev_id(start_revno) end_rev_id = None if end_revision is None: end_revno = branch_revno else: if isinstance(end_revision, revisionspec.RevisionInfo): end_rev_id = end_revision.rev_id end_revno = end_revision.revno or branch_revno else: branch.check_real_revno(end_revision) end_revno = end_revision end_rev_id = branch.get_rev_id(end_revno) if branch_revno != 0: if (start_rev_id == _mod_revision.NULL_REVISION or end_rev_id == _mod_revision.NULL_REVISION): raise errors.BzrCommandError(gettext('Logging revision 0 is invalid.')) if start_revno > end_revno: raise errors.BzrCommandError(gettext("Start revision must be " "older than the end revision.")) return (start_rev_id, end_rev_id) def _get_mainline_revs(branch, start_revision, end_revision): """Get the mainline revisions from the branch. Generates the list of mainline revisions for the branch. :param branch: The branch containing the revisions. :param start_revision: The first revision to be logged. For backwards compatibility this may be a mainline integer revno, but for merge revision support a RevisionInfo is expected. :param end_revision: The last revision to be logged. For backwards compatibility this may be a mainline integer revno, but for merge revision support a RevisionInfo is expected. :return: A (mainline_revs, rev_nos, start_rev_id, end_rev_id) tuple. """ branch_revno, branch_last_revision = branch.last_revision_info() if branch_revno == 0: return None, None, None, None # For mainline generation, map start_revision and end_revision to # mainline revnos. If the revision is not on the mainline choose the # appropriate extreme of the mainline instead - the extra will be # filtered later. # Also map the revisions to rev_ids, to be used in the later filtering # stage. start_rev_id = None if start_revision is None: start_revno = 1 else: if isinstance(start_revision, revisionspec.RevisionInfo): start_rev_id = start_revision.rev_id start_revno = start_revision.revno or 1 else: branch.check_real_revno(start_revision) start_revno = start_revision end_rev_id = None if end_revision is None: end_revno = branch_revno else: if isinstance(end_revision, revisionspec.RevisionInfo): end_rev_id = end_revision.rev_id end_revno = end_revision.revno or branch_revno else: branch.check_real_revno(end_revision) end_revno = end_revision if ((start_rev_id == _mod_revision.NULL_REVISION) or (end_rev_id == _mod_revision.NULL_REVISION)): raise errors.BzrCommandError(gettext('Logging revision 0 is invalid.')) if start_revno > end_revno: raise errors.BzrCommandError(gettext("Start revision must be older " "than the end revision.")) if end_revno < start_revno: return None, None, None, None cur_revno = branch_revno rev_nos = {} mainline_revs = [] graph = branch.repository.get_graph() for revision_id in graph.iter_lefthand_ancestry( branch_last_revision, (_mod_revision.NULL_REVISION,)): if cur_revno < start_revno: # We have gone far enough, but we always add 1 more revision rev_nos[revision_id] = cur_revno mainline_revs.append(revision_id) break if cur_revno <= end_revno: rev_nos[revision_id] = cur_revno mainline_revs.append(revision_id) cur_revno -= 1 else: # We walked off the edge of all revisions, so we add a 'None' marker mainline_revs.append(None) mainline_revs.reverse() # override the mainline to look like the revision history. return mainline_revs, rev_nos, start_rev_id, end_rev_id def _filter_revisions_touching_file_id(branch, file_id, view_revisions, include_merges=True): r"""Return the list of revision ids which touch a given file id. The function filters view_revisions and returns a subset. This includes the revisions which directly change the file id, and the revisions which merge these changes. So if the revision graph is:: A-. |\ \ B C E |/ / D | |\| | F |/ G And 'C' changes a file, then both C and D will be returned. F will not be returned even though it brings the changes to C into the branch starting with E. (Note that if we were using F as the tip instead of G, then we would see C, D, F.) This will also be restricted based on a subset of the mainline. :param branch: The branch where we can get text revision information. :param file_id: Filter out revisions that do not touch file_id. :param view_revisions: A list of (revision_id, dotted_revno, merge_depth) tuples. This is the list of revisions which will be filtered. It is assumed that view_revisions is in merge_sort order (i.e. newest revision first ). :param include_merges: include merge revisions in the result or not :return: A list of (revision_id, dotted_revno, merge_depth) tuples. """ # Lookup all possible text keys to determine which ones actually modified # the file. graph = branch.repository.get_file_graph() get_parent_map = graph.get_parent_map text_keys = [(file_id, rev_id) for rev_id, revno, depth in view_revisions] next_keys = None # Looking up keys in batches of 1000 can cut the time in half, as well as # memory consumption. GraphIndex *does* like to look for a few keys in # parallel, it just doesn't like looking for *lots* of keys in parallel. # TODO: This code needs to be re-evaluated periodically as we tune the # indexing layer. We might consider passing in hints as to the known # access pattern (sparse/clustered, high success rate/low success # rate). This particular access is clustered with a low success rate. modified_text_revisions = set() chunk_size = 1000 for start in xrange(0, len(text_keys), chunk_size): next_keys = text_keys[start:start + chunk_size] # Only keep the revision_id portion of the key modified_text_revisions.update( [k[1] for k in get_parent_map(next_keys)]) del text_keys, next_keys result = [] # Track what revisions will merge the current revision, replace entries # with 'None' when they have been added to result current_merge_stack = [None] for info in view_revisions: rev_id, revno, depth = info if depth == len(current_merge_stack): current_merge_stack.append(info) else: del current_merge_stack[depth + 1:] current_merge_stack[-1] = info if rev_id in modified_text_revisions: # This needs to be logged, along with the extra revisions for idx in xrange(len(current_merge_stack)): node = current_merge_stack[idx] if node is not None: if include_merges or node[2] == 0: result.append(node) current_merge_stack[idx] = None return result def reverse_by_depth(merge_sorted_revisions, _depth=0): """Reverse revisions by depth. Revisions with a different depth are sorted as a group with the previous revision of that depth. There may be no topological justification for this, but it looks much nicer. """ # Add a fake revision at start so that we can always attach sub revisions merge_sorted_revisions = [(None, None, _depth)] + merge_sorted_revisions zd_revisions = [] for val in merge_sorted_revisions: if val[2] == _depth: # Each revision at the current depth becomes a chunk grouping all # higher depth revisions. zd_revisions.append([val]) else: zd_revisions[-1].append(val) for revisions in zd_revisions: if len(revisions) > 1: # We have higher depth revisions, let reverse them locally revisions[1:] = reverse_by_depth(revisions[1:], _depth + 1) zd_revisions.reverse() result = [] for chunk in zd_revisions: result.extend(chunk) if _depth == 0: # Top level call, get rid of the fake revisions that have been added result = [r for r in result if r[0] is not None and r[1] is not None] return result class LogRevision(object): """A revision to be logged (by LogFormatter.log_revision). A simple wrapper for the attributes of a revision to be logged. The attributes may or may not be populated, as determined by the logging options and the log formatter capabilities. """ def __init__(self, rev=None, revno=None, merge_depth=0, delta=None, tags=None, diff=None, signature=None): self.rev = rev if revno is None: self.revno = None else: self.revno = str(revno) self.merge_depth = merge_depth self.delta = delta self.tags = tags self.diff = diff self.signature = signature class LogFormatter(object): """Abstract class to display log messages. At a minimum, a derived class must implement the log_revision method. If the LogFormatter needs to be informed of the beginning or end of a log it should implement the begin_log and/or end_log hook methods. A LogFormatter should define the following supports_XXX flags to indicate which LogRevision attributes it supports: - supports_delta must be True if this log formatter supports delta. Otherwise the delta attribute may not be populated. The 'delta_format' attribute describes whether the 'short_status' format (1) or the long one (2) should be used. - supports_merge_revisions must be True if this log formatter supports merge revisions. If not, then only mainline revisions will be passed to the formatter. - preferred_levels is the number of levels this formatter defaults to. The default value is zero meaning display all levels. This value is only relevant if supports_merge_revisions is True. - supports_tags must be True if this log formatter supports tags. Otherwise the tags attribute may not be populated. - supports_diff must be True if this log formatter supports diffs. Otherwise the diff attribute may not be populated. - supports_signatures must be True if this log formatter supports GPG signatures. Plugins can register functions to show custom revision properties using the properties_handler_registry. The registered function must respect the following interface description:: def my_show_properties(properties_dict): # code that returns a dict {'name':'value'} of the properties # to be shown """ preferred_levels = 0 def __init__(self, to_file, show_ids=False, show_timezone='original', delta_format=None, levels=None, show_advice=False, to_exact_file=None, author_list_handler=None): """Create a LogFormatter. :param to_file: the file to output to :param to_exact_file: if set, gives an output stream to which non-Unicode diffs are written. :param show_ids: if True, revision-ids are to be displayed :param show_timezone: the timezone to use :param delta_format: the level of delta information to display or None to leave it to the formatter to decide :param levels: the number of levels to display; None or -1 to let the log formatter decide. :param show_advice: whether to show advice at the end of the log or not :param author_list_handler: callable generating a list of authors to display for a given revision """ self.to_file = to_file # 'exact' stream used to show diff, it should print content 'as is' # and should not try to decode/encode it to unicode to avoid bug #328007 if to_exact_file is not None: self.to_exact_file = to_exact_file else: # XXX: somewhat hacky; this assumes it's a codec writer; it's better # for code that expects to get diffs to pass in the exact file # stream self.to_exact_file = getattr(to_file, 'stream', to_file) self.show_ids = show_ids self.show_timezone = show_timezone if delta_format is None: # Ensures backward compatibility delta_format = 2 # long format self.delta_format = delta_format self.levels = levels self._show_advice = show_advice self._merge_count = 0 self._author_list_handler = author_list_handler def get_levels(self): """Get the number of levels to display or 0 for all.""" if getattr(self, 'supports_merge_revisions', False): if self.levels is None or self.levels == -1: self.levels = self.preferred_levels else: self.levels = 1 return self.levels def log_revision(self, revision): """Log a revision. :param revision: The LogRevision to be logged. """ raise NotImplementedError('not implemented in abstract base') def show_advice(self): """Output user advice, if any, when the log is completed.""" if self._show_advice and self.levels == 1 and self._merge_count > 0: advice_sep = self.get_advice_separator() if advice_sep: self.to_file.write(advice_sep) self.to_file.write( "Use --include-merged or -n0 to see merged revisions.\n") def get_advice_separator(self): """Get the text separating the log from the closing advice.""" return '' def short_committer(self, rev): name, address = config.parse_username(rev.committer) if name: return name return address def short_author(self, rev): return self.authors(rev, 'first', short=True, sep=', ') def authors(self, rev, who, short=False, sep=None): """Generate list of authors, taking --authors option into account. The caller has to specify the name of a author list handler, as provided by the author list registry, using the ``who`` argument. That name only sets a default, though: when the user selected a different author list generation using the ``--authors`` command line switch, as represented by the ``author_list_handler`` constructor argument, that value takes precedence. :param rev: The revision for which to generate the list of authors. :param who: Name of the default handler. :param short: Whether to shorten names to either name or address. :param sep: What separator to use for automatic concatenation. """ if self._author_list_handler is not None: # The user did specify --authors, which overrides the default author_list_handler = self._author_list_handler else: # The user didn't specify --authors, so we use the caller's default author_list_handler = author_list_registry.get(who) names = author_list_handler(rev) if short: for i in range(len(names)): name, address = config.parse_username(names[i]) if name: names[i] = name else: names[i] = address if sep is not None: names = sep.join(names) return names def merge_marker(self, revision): """Get the merge marker to include in the output or '' if none.""" if len(revision.rev.parent_ids) > 1: self._merge_count += 1 return ' [merge]' else: return '' def show_properties(self, revision, indent): """Displays the custom properties returned by each registered handler. If a registered handler raises an error it is propagated. """ for line in self.custom_properties(revision): self.to_file.write("%s%s\n" % (indent, line)) def custom_properties(self, revision): """Format the custom properties returned by each registered handler. If a registered handler raises an error it is propagated. :return: a list of formatted lines (excluding trailing newlines) """ lines = self._foreign_info_properties(revision) for key, handler in properties_handler_registry.iteritems(): lines.extend(self._format_properties(handler(revision))) return lines def _foreign_info_properties(self, rev): """Custom log displayer for foreign revision identifiers. :param rev: Revision object. """ # Revision comes directly from a foreign repository if isinstance(rev, foreign.ForeignRevision): return self._format_properties( rev.mapping.vcs.show_foreign_revid(rev.foreign_revid)) # Imported foreign revision revision ids always contain : if not ":" in rev.revision_id: return [] # Revision was once imported from a foreign repository try: foreign_revid, mapping = \ foreign.foreign_vcs_registry.parse_revision_id(rev.revision_id) except errors.InvalidRevisionId: return [] return self._format_properties( mapping.vcs.show_foreign_revid(foreign_revid)) def _format_properties(self, properties): lines = [] for key, value in properties.items(): lines.append(key + ': ' + value) return lines def show_diff(self, to_file, diff, indent): for l in diff.rstrip().split('\n'): to_file.write(indent + '%s\n' % (l,)) # Separator between revisions in long format _LONG_SEP = '-' * 60 class LongLogFormatter(LogFormatter): supports_merge_revisions = True preferred_levels = 1 supports_delta = True supports_tags = True supports_diff = True supports_signatures = True def __init__(self, *args, **kwargs): super(LongLogFormatter, self).__init__(*args, **kwargs) if self.show_timezone == 'original': self.date_string = self._date_string_original_timezone else: self.date_string = self._date_string_with_timezone def _date_string_with_timezone(self, rev): return format_date(rev.timestamp, rev.timezone or 0, self.show_timezone) def _date_string_original_timezone(self, rev): return format_date_with_offset_in_original_timezone(rev.timestamp, rev.timezone or 0) def log_revision(self, revision): """Log a revision, either merged or not.""" indent = ' ' * revision.merge_depth lines = [_LONG_SEP] if revision.revno is not None: lines.append('revno: %s%s' % (revision.revno, self.merge_marker(revision))) if revision.tags: lines.append('tags: %s' % (', '.join(revision.tags))) if self.show_ids or revision.revno is None: lines.append('revision-id: %s' % (revision.rev.revision_id,)) if self.show_ids: for parent_id in revision.rev.parent_ids: lines.append('parent: %s' % (parent_id,)) lines.extend(self.custom_properties(revision.rev)) committer = revision.rev.committer authors = self.authors(revision.rev, 'all') if authors != [committer]: lines.append('author: %s' % (", ".join(authors),)) lines.append('committer: %s' % (committer,)) branch_nick = revision.rev.properties.get('branch-nick', None) if branch_nick is not None: lines.append('branch nick: %s' % (branch_nick,)) lines.append('timestamp: %s' % (self.date_string(revision.rev),)) if revision.signature is not None: lines.append('signature: ' + revision.signature) lines.append('message:') if not revision.rev.message: lines.append(' (no message)') else: message = revision.rev.message.rstrip('\r\n') for l in message.split('\n'): lines.append(' %s' % (l,)) # Dump the output, appending the delta and diff if requested to_file = self.to_file to_file.write("%s%s\n" % (indent, ('\n' + indent).join(lines))) if revision.delta is not None: # Use the standard status output to display changes from bzrlib.delta import report_delta report_delta(to_file, revision.delta, short_status=False, show_ids=self.show_ids, indent=indent) if revision.diff is not None: to_file.write(indent + 'diff:\n') to_file.flush() # Note: we explicitly don't indent the diff (relative to the # revision information) so that the output can be fed to patch -p0 self.show_diff(self.to_exact_file, revision.diff, indent) self.to_exact_file.flush() def get_advice_separator(self): """Get the text separating the log from the closing advice.""" return '-' * 60 + '\n' class ShortLogFormatter(LogFormatter): supports_merge_revisions = True preferred_levels = 1 supports_delta = True supports_tags = True supports_diff = True def __init__(self, *args, **kwargs): super(ShortLogFormatter, self).__init__(*args, **kwargs) self.revno_width_by_depth = {} def log_revision(self, revision): # We need two indents: one per depth and one for the information # relative to that indent. Most mainline revnos are 5 chars or # less while dotted revnos are typically 11 chars or less. Once # calculated, we need to remember the offset for a given depth # as we might be starting from a dotted revno in the first column # and we want subsequent mainline revisions to line up. depth = revision.merge_depth indent = ' ' * depth revno_width = self.revno_width_by_depth.get(depth) if revno_width is None: if revision.revno is None or revision.revno.find('.') == -1: # mainline revno, e.g. 12345 revno_width = 5 else: # dotted revno, e.g. 12345.10.55 revno_width = 11 self.revno_width_by_depth[depth] = revno_width offset = ' ' * (revno_width + 1) to_file = self.to_file tags = '' if revision.tags: tags = ' {%s}' % (', '.join(revision.tags)) to_file.write(indent + "%*s %s\t%s%s%s\n" % (revno_width, revision.revno or "", self.short_author(revision.rev), format_date(revision.rev.timestamp, revision.rev.timezone or 0, self.show_timezone, date_fmt="%Y-%m-%d", show_offset=False), tags, self.merge_marker(revision))) self.show_properties(revision.rev, indent+offset) if self.show_ids or revision.revno is None: to_file.write(indent + offset + 'revision-id:%s\n' % (revision.rev.revision_id,)) if not revision.rev.message: to_file.write(indent + offset + '(no message)\n') else: message = revision.rev.message.rstrip('\r\n') for l in message.split('\n'): to_file.write(indent + offset + '%s\n' % (l,)) if revision.delta is not None: # Use the standard status output to display changes from bzrlib.delta import report_delta report_delta(to_file, revision.delta, short_status=self.delta_format==1, show_ids=self.show_ids, indent=indent + offset) if revision.diff is not None: self.show_diff(self.to_exact_file, revision.diff, ' ') to_file.write('\n') class LineLogFormatter(LogFormatter): supports_merge_revisions = True preferred_levels = 1 supports_tags = True def __init__(self, *args, **kwargs): super(LineLogFormatter, self).__init__(*args, **kwargs) width = terminal_width() if width is not None: # we need one extra space for terminals that wrap on last char width = width - 1 self._max_chars = width def truncate(self, str, max_len): if max_len is None or len(str) <= max_len: return str return str[:max_len-3] + '...' def date_string(self, rev): return format_date(rev.timestamp, rev.timezone or 0, self.show_timezone, date_fmt="%Y-%m-%d", show_offset=False) def message(self, rev): if not rev.message: return '(no message)' else: return rev.message def log_revision(self, revision): indent = ' ' * revision.merge_depth self.to_file.write(self.log_string(revision.revno, revision.rev, self._max_chars, revision.tags, indent)) self.to_file.write('\n') def log_string(self, revno, rev, max_chars, tags=None, prefix=''): """Format log info into one string. Truncate tail of string :param revno: revision number or None. Revision numbers counts from 1. :param rev: revision object :param max_chars: maximum length of resulting string :param tags: list of tags or None :param prefix: string to prefix each line :return: formatted truncated string """ out = [] if revno: # show revno only when is not None out.append("%s:" % revno) if max_chars is not None: out.append(self.truncate(self.short_author(rev), (max_chars+3)/4)) else: out.append(self.short_author(rev)) out.append(self.date_string(rev)) if len(rev.parent_ids) > 1: out.append('[merge]') if tags: tag_str = '{%s}' % (', '.join(tags)) out.append(tag_str) out.append(rev.get_summary()) return self.truncate(prefix + " ".join(out).rstrip('\n'), max_chars) class GnuChangelogLogFormatter(LogFormatter): supports_merge_revisions = True supports_delta = True def log_revision(self, revision): """Log a revision, either merged or not.""" to_file = self.to_file date_str = format_date(revision.rev.timestamp, revision.rev.timezone or 0, self.show_timezone, date_fmt='%Y-%m-%d', show_offset=False) committer_str = self.authors(revision.rev, 'first', sep=', ') committer_str = committer_str.replace(' <', ' <') to_file.write('%s %s\n\n' % (date_str,committer_str)) if revision.delta is not None and revision.delta.has_changed(): for c in revision.delta.added + revision.delta.removed + revision.delta.modified: path, = c[:1] to_file.write('\t* %s:\n' % (path,)) for c in revision.delta.renamed: oldpath,newpath = c[:2] # For renamed files, show both the old and the new path to_file.write('\t* %s:\n\t* %s:\n' % (oldpath,newpath)) to_file.write('\n') if not revision.rev.message: to_file.write('\tNo commit message\n') else: message = revision.rev.message.rstrip('\r\n') for l in message.split('\n'): to_file.write('\t%s\n' % (l.lstrip(),)) to_file.write('\n') def line_log(rev, max_chars): lf = LineLogFormatter(None) return lf.log_string(None, rev, max_chars) class LogFormatterRegistry(registry.Registry): """Registry for log formatters""" def make_formatter(self, name, *args, **kwargs): """Construct a formatter from arguments. :param name: Name of the formatter to construct. 'short', 'long' and 'line' are built-in. """ return self.get(name)(*args, **kwargs) def get_default(self, branch): c = branch.get_config_stack() return self.get(c.get('log_format')) log_formatter_registry = LogFormatterRegistry() log_formatter_registry.register('short', ShortLogFormatter, 'Moderately short log format.') log_formatter_registry.register('long', LongLogFormatter, 'Detailed log format.') log_formatter_registry.register('line', LineLogFormatter, 'Log format with one line per revision.') log_formatter_registry.register('gnu-changelog', GnuChangelogLogFormatter, 'Format used by GNU ChangeLog files.') def register_formatter(name, formatter): log_formatter_registry.register(name, formatter) def log_formatter(name, *args, **kwargs): """Construct a formatter from arguments. name -- Name of the formatter to construct; currently 'long', 'short' and 'line' are supported. """ try: return log_formatter_registry.make_formatter(name, *args, **kwargs) except KeyError: raise errors.BzrCommandError(gettext("unknown log formatter: %r") % name) def author_list_all(rev): return rev.get_apparent_authors()[:] def author_list_first(rev): lst = rev.get_apparent_authors() try: return [lst[0]] except IndexError: return [] def author_list_committer(rev): return [rev.committer] author_list_registry = registry.Registry() author_list_registry.register('all', author_list_all, 'All authors') author_list_registry.register('first', author_list_first, 'The first author') author_list_registry.register('committer', author_list_committer, 'The committer') def show_changed_revisions(branch, old_rh, new_rh, to_file=None, log_format='long'): """Show the change in revision history comparing the old revision history to the new one. :param branch: The branch where the revisions exist :param old_rh: The old revision history :param new_rh: The new revision history :param to_file: A file to write the results to. If None, stdout will be used """ if to_file is None: to_file = codecs.getwriter(get_terminal_encoding())(sys.stdout, errors='replace') lf = log_formatter(log_format, show_ids=False, to_file=to_file, show_timezone='original') # This is the first index which is different between # old and new base_idx = None for i in xrange(max(len(new_rh), len(old_rh))): if (len(new_rh) <= i or len(old_rh) <= i or new_rh[i] != old_rh[i]): base_idx = i break if base_idx is None: to_file.write('Nothing seems to have changed\n') return ## TODO: It might be nice to do something like show_log ## and show the merged entries. But since this is the ## removed revisions, it shouldn't be as important if base_idx < len(old_rh): to_file.write('*'*60) to_file.write('\nRemoved Revisions:\n') for i in range(base_idx, len(old_rh)): rev = branch.repository.get_revision(old_rh[i]) lr = LogRevision(rev, i+1, 0, None) lf.log_revision(lr) to_file.write('*'*60) to_file.write('\n\n') if base_idx < len(new_rh): to_file.write('Added Revisions:\n') show_log(branch, lf, None, verbose=False, direction='forward', start_revision=base_idx+1, end_revision=len(new_rh), search=None) def get_history_change(old_revision_id, new_revision_id, repository): """Calculate the uncommon lefthand history between two revisions. :param old_revision_id: The original revision id. :param new_revision_id: The new revision id. :param repository: The repository to use for the calculation. return old_history, new_history """ old_history = [] old_revisions = set() new_history = [] new_revisions = set() graph = repository.get_graph() new_iter = graph.iter_lefthand_ancestry(new_revision_id) old_iter = graph.iter_lefthand_ancestry(old_revision_id) stop_revision = None do_old = True do_new = True while do_new or do_old: if do_new: try: new_revision = new_iter.next() except StopIteration: do_new = False else: new_history.append(new_revision) new_revisions.add(new_revision) if new_revision in old_revisions: stop_revision = new_revision break if do_old: try: old_revision = old_iter.next() except StopIteration: do_old = False else: old_history.append(old_revision) old_revisions.add(old_revision) if old_revision in new_revisions: stop_revision = old_revision break new_history.reverse() old_history.reverse() if stop_revision is not None: new_history = new_history[new_history.index(stop_revision) + 1:] old_history = old_history[old_history.index(stop_revision) + 1:] return old_history, new_history def show_branch_change(branch, output, old_revno, old_revision_id): """Show the changes made to a branch. :param branch: The branch to show changes about. :param output: A file-like object to write changes to. :param old_revno: The revno of the old tip. :param old_revision_id: The revision_id of the old tip. """ new_revno, new_revision_id = branch.last_revision_info() old_history, new_history = get_history_change(old_revision_id, new_revision_id, branch.repository) if old_history == [] and new_history == []: output.write('Nothing seems to have changed\n') return log_format = log_formatter_registry.get_default(branch) lf = log_format(show_ids=False, to_file=output, show_timezone='original') if old_history != []: output.write('*'*60) output.write('\nRemoved Revisions:\n') show_flat_log(branch.repository, old_history, old_revno, lf) output.write('*'*60) output.write('\n\n') if new_history != []: output.write('Added Revisions:\n') start_revno = new_revno - len(new_history) + 1 show_log(branch, lf, None, verbose=False, direction='forward', start_revision=start_revno,) def show_flat_log(repository, history, last_revno, lf): """Show a simple log of the specified history. :param repository: The repository to retrieve revisions from. :param history: A list of revision_ids indicating the lefthand history. :param last_revno: The revno of the last revision_id in the history. :param lf: The log formatter to use. """ start_revno = last_revno - len(history) + 1 revisions = repository.get_revisions(history) for i, rev in enumerate(revisions): lr = LogRevision(rev, i + last_revno, 0, None) lf.log_revision(lr) def _get_info_for_log_files(revisionspec_list, file_list, add_cleanup): """Find file-ids and kinds given a list of files and a revision range. We search for files at the end of the range. If not found there, we try the start of the range. :param revisionspec_list: revision range as parsed on the command line :param file_list: the list of paths given on the command line; the first of these can be a branch location or a file path, the remainder must be file paths :param add_cleanup: When the branch returned is read locked, an unlock call will be queued to the cleanup. :return: (branch, info_list, start_rev_info, end_rev_info) where info_list is a list of (relative_path, file_id, kind) tuples where kind is one of values 'directory', 'file', 'symlink', 'tree-reference'. branch will be read-locked. """ from bzrlib.builtins import _get_revision_range tree, b, path = controldir.ControlDir.open_containing_tree_or_branch( file_list[0]) add_cleanup(b.lock_read().unlock) # XXX: It's damn messy converting a list of paths to relative paths when # those paths might be deleted ones, they might be on a case-insensitive # filesystem and/or they might be in silly locations (like another branch). # For example, what should "log bzr://branch/dir/file1 file2" do? (Is # file2 implicitly in the same dir as file1 or should its directory be # taken from the current tree somehow?) For now, this solves the common # case of running log in a nested directory, assuming paths beyond the # first one haven't been deleted ... if tree: relpaths = [path] + tree.safe_relpath_files(file_list[1:]) else: relpaths = [path] + file_list[1:] info_list = [] start_rev_info, end_rev_info = _get_revision_range(revisionspec_list, b, "log") if relpaths in ([], [u'']): return b, [], start_rev_info, end_rev_info if start_rev_info is None and end_rev_info is None: if tree is None: tree = b.basis_tree() tree1 = None for fp in relpaths: file_id = tree.path2id(fp) kind = _get_kind_for_file_id(tree, file_id) if file_id is None: # go back to when time began if tree1 is None: try: rev1 = b.get_rev_id(1) except errors.NoSuchRevision: # No history at all file_id = None kind = None else: tree1 = b.repository.revision_tree(rev1) if tree1: file_id = tree1.path2id(fp) kind = _get_kind_for_file_id(tree1, file_id) info_list.append((fp, file_id, kind)) elif start_rev_info == end_rev_info: # One revision given - file must exist in it tree = b.repository.revision_tree(end_rev_info.rev_id) for fp in relpaths: file_id = tree.path2id(fp) kind = _get_kind_for_file_id(tree, file_id) info_list.append((fp, file_id, kind)) else: # Revision range given. Get the file-id from the end tree. # If that fails, try the start tree. rev_id = end_rev_info.rev_id if rev_id is None: tree = b.basis_tree() else: tree = b.repository.revision_tree(rev_id) tree1 = None for fp in relpaths: file_id = tree.path2id(fp) kind = _get_kind_for_file_id(tree, file_id) if file_id is None: if tree1 is None: rev_id = start_rev_info.rev_id if rev_id is None: rev1 = b.get_rev_id(1) tree1 = b.repository.revision_tree(rev1) else: tree1 = b.repository.revision_tree(rev_id) file_id = tree1.path2id(fp) kind = _get_kind_for_file_id(tree1, file_id) info_list.append((fp, file_id, kind)) return b, info_list, start_rev_info, end_rev_info def _get_kind_for_file_id(tree, file_id): """Return the kind of a file-id or None if it doesn't exist.""" if file_id is not None: return tree.kind(file_id) else: return None properties_handler_registry = registry.Registry() # Use the properties handlers to print out bug information if available def _bugs_properties_handler(revision): if revision.properties.has_key('bugs'): bug_lines = revision.properties['bugs'].split('\n') bug_rows = [line.split(' ', 1) for line in bug_lines] fixed_bug_urls = [row[0] for row in bug_rows if len(row) > 1 and row[1] == 'fixed'] if fixed_bug_urls: return {ngettext('fixes bug', 'fixes bugs', len(fixed_bug_urls)):\ ' '.join(fixed_bug_urls)} return {} properties_handler_registry.register('bugs_properties_handler', _bugs_properties_handler) # adapters which revision ids to log are filtered. When log is called, the # log_rev_iterator is adapted through each of these factory methods. # Plugins are welcome to mutate this list in any way they like - as long # as the overall behaviour is preserved. At this point there is no extensible # mechanism for getting parameters to each factory method, and until there is # this won't be considered a stable api. log_adapters = [ # core log logic _make_batch_filter, # read revision objects _make_revision_objects, # filter on log messages _make_search_filter, # generate deltas for things we will show _make_delta_filter ] bzr-2.7.0/bzrlib/lru_cache.py0000644000000000000000000002733511673635356014257 0ustar 00000000000000# Copyright (C) 2006, 2008, 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """A simple least-recently-used (LRU) cache.""" from __future__ import absolute_import from bzrlib import ( symbol_versioning, trace, ) _null_key = object() class _LRUNode(object): """This maintains the linked-list which is the lru internals.""" __slots__ = ('prev', 'next_key', 'key', 'value') def __init__(self, key, value): self.prev = None self.next_key = _null_key self.key = key self.value = value def __repr__(self): if self.prev is None: prev_key = None else: prev_key = self.prev.key return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key, self.next_key, prev_key) class LRUCache(object): """A class which manages a cache of entries, removing unused ones.""" def __init__(self, max_cache=100, after_cleanup_count=None): self._cache = {} # The "HEAD" of the lru linked list self._most_recently_used = None # The "TAIL" of the lru linked list self._least_recently_used = None self._update_max_cache(max_cache, after_cleanup_count) def __contains__(self, key): return key in self._cache def __getitem__(self, key): cache = self._cache node = cache[key] # Inlined from _record_access to decrease the overhead of __getitem__ # We also have more knowledge about structure if __getitem__ is # succeeding, then we know that self._most_recently_used must not be # None, etc. mru = self._most_recently_used if node is mru: # Nothing to do, this node is already at the head of the queue return node.value # Remove this node from the old location node_prev = node.prev next_key = node.next_key # benchmarking shows that the lookup of _null_key in globals is faster # than the attribute lookup for (node is self._least_recently_used) if next_key is _null_key: # 'node' is the _least_recently_used, because it doesn't have a # 'next' item. So move the current lru to the previous node. self._least_recently_used = node_prev else: node_next = cache[next_key] node_next.prev = node_prev node_prev.next_key = next_key # Insert this node at the front of the list node.next_key = mru.key mru.prev = node self._most_recently_used = node node.prev = None return node.value def __len__(self): return len(self._cache) @symbol_versioning.deprecated_method( symbol_versioning.deprecated_in((2, 5, 0))) def add(self, key, value, cleanup=None): if cleanup is not None: raise ValueError("Per-node cleanup functions no longer supported") return self.__setitem__(key, value) def __setitem__(self, key, value): """Add a new value to the cache""" if key is _null_key: raise ValueError('cannot use _null_key as a key') if key in self._cache: node = self._cache[key] node.value = value self._record_access(node) else: node = _LRUNode(key, value) self._cache[key] = node self._record_access(node) if len(self._cache) > self._max_cache: # Trigger the cleanup self.cleanup() def cache_size(self): """Get the number of entries we will cache.""" return self._max_cache def get(self, key, default=None): node = self._cache.get(key, None) if node is None: return default self._record_access(node) return node.value def keys(self): """Get the list of keys currently cached. Note that values returned here may not be available by the time you request them later. This is simply meant as a peak into the current state. :return: An unordered list of keys that are currently cached. """ return self._cache.keys() def as_dict(self): """Get a new dict with the same key:value pairs as the cache""" return dict((k, n.value) for k, n in self._cache.iteritems()) items = symbol_versioning.deprecated_method( symbol_versioning.deprecated_in((2, 5, 0)))(as_dict) def cleanup(self): """Clear the cache until it shrinks to the requested size. This does not completely wipe the cache, just makes sure it is under the after_cleanup_count. """ # Make sure the cache is shrunk to the correct size while len(self._cache) > self._after_cleanup_count: self._remove_lru() def _record_access(self, node): """Record that key was accessed.""" # Move 'node' to the front of the queue if self._most_recently_used is None: self._most_recently_used = node self._least_recently_used = node return elif node is self._most_recently_used: # Nothing to do, this node is already at the head of the queue return # We've taken care of the tail pointer, remove the node, and insert it # at the front # REMOVE if node is self._least_recently_used: self._least_recently_used = node.prev if node.prev is not None: node.prev.next_key = node.next_key if node.next_key is not _null_key: node_next = self._cache[node.next_key] node_next.prev = node.prev # INSERT node.next_key = self._most_recently_used.key self._most_recently_used.prev = node self._most_recently_used = node node.prev = None def _remove_node(self, node): if node is self._least_recently_used: self._least_recently_used = node.prev self._cache.pop(node.key) # If we have removed all entries, remove the head pointer as well if self._least_recently_used is None: self._most_recently_used = None if node.prev is not None: node.prev.next_key = node.next_key if node.next_key is not _null_key: node_next = self._cache[node.next_key] node_next.prev = node.prev # And remove this node's pointers node.prev = None node.next_key = _null_key def _remove_lru(self): """Remove one entry from the lru, and handle consequences. If there are no more references to the lru, then this entry should be removed from the cache. """ self._remove_node(self._least_recently_used) def clear(self): """Clear out all of the cache.""" # Clean up in LRU order while self._cache: self._remove_lru() def resize(self, max_cache, after_cleanup_count=None): """Change the number of entries that will be cached.""" self._update_max_cache(max_cache, after_cleanup_count=after_cleanup_count) def _update_max_cache(self, max_cache, after_cleanup_count=None): self._max_cache = max_cache if after_cleanup_count is None: self._after_cleanup_count = self._max_cache * 8 / 10 else: self._after_cleanup_count = min(after_cleanup_count, self._max_cache) self.cleanup() class LRUSizeCache(LRUCache): """An LRUCache that removes things based on the size of the values. This differs in that it doesn't care how many actual items there are, it just restricts the cache to be cleaned up after so much data is stored. The size of items added will be computed using compute_size(value), which defaults to len() if not supplied. """ def __init__(self, max_size=1024*1024, after_cleanup_size=None, compute_size=None): """Create a new LRUSizeCache. :param max_size: The max number of bytes to store before we start clearing out entries. :param after_cleanup_size: After cleaning up, shrink everything to this size. :param compute_size: A function to compute the size of the values. We use a function here, so that you can pass 'len' if you are just using simple strings, or a more complex function if you are using something like a list of strings, or even a custom object. The function should take the form "compute_size(value) => integer". If not supplied, it defaults to 'len()' """ self._value_size = 0 self._compute_size = compute_size if compute_size is None: self._compute_size = len self._update_max_size(max_size, after_cleanup_size=after_cleanup_size) LRUCache.__init__(self, max_cache=max(int(max_size/512), 1)) def __setitem__(self, key, value): """Add a new value to the cache""" if key is _null_key: raise ValueError('cannot use _null_key as a key') node = self._cache.get(key, None) value_len = self._compute_size(value) if value_len >= self._after_cleanup_size: # The new value is 'too big to fit', as it would fill up/overflow # the cache all by itself trace.mutter('Adding the key %r to an LRUSizeCache failed.' ' value %d is too big to fit in a the cache' ' with size %d %d', key, value_len, self._after_cleanup_size, self._max_size) if node is not None: # We won't be replacing the old node, so just remove it self._remove_node(node) return if node is None: node = _LRUNode(key, value) self._cache[key] = node else: self._value_size -= self._compute_size(node.value) self._value_size += value_len self._record_access(node) if self._value_size > self._max_size: # Time to cleanup self.cleanup() def cleanup(self): """Clear the cache until it shrinks to the requested size. This does not completely wipe the cache, just makes sure it is under the after_cleanup_size. """ # Make sure the cache is shrunk to the correct size while self._value_size > self._after_cleanup_size: self._remove_lru() def _remove_node(self, node): self._value_size -= self._compute_size(node.value) LRUCache._remove_node(self, node) def resize(self, max_size, after_cleanup_size=None): """Change the number of bytes that will be cached.""" self._update_max_size(max_size, after_cleanup_size=after_cleanup_size) max_cache = max(int(max_size/512), 1) self._update_max_cache(max_cache) def _update_max_size(self, max_size, after_cleanup_size=None): self._max_size = max_size if after_cleanup_size is None: self._after_cleanup_size = self._max_size * 8 / 10 else: self._after_cleanup_size = min(after_cleanup_size, self._max_size) bzr-2.7.0/bzrlib/lsprof.py0000644000000000000000000002663511673403246013630 0ustar 00000000000000# this is copied from the lsprof distro because somehow # it is not installed by distutils # I made one modification to profile so that it returns a pair # instead of just the Stats object from __future__ import absolute_import import cPickle import os import sys import thread import threading from _lsprof import Profiler, profiler_entry from bzrlib import errors __all__ = ['profile', 'Stats'] def profile(f, *args, **kwds): """Run a function profile. Exceptions are not caught: If you need stats even when exceptions are to be raised, pass in a closure that will catch the exceptions and transform them appropriately for your driver function. Important caveat: only one profile can execute at a time. See BzrProfiler for details. :return: The functions return value and a stats object. """ profiler = BzrProfiler() profiler.start() try: ret = f(*args, **kwds) finally: stats = profiler.stop() return ret, stats class BzrProfiler(object): """Bzr utility wrapper around Profiler. For most uses the module level 'profile()' function will be suitable. However profiling when a simple wrapped function isn't available may be easier to accomplish using this class. To use it, create a BzrProfiler and call start() on it. Some arbitrary time later call stop() to stop profiling and retrieve the statistics from the code executed in the interim. Note that profiling involves a threading.Lock around the actual profiling. This is needed because profiling involves global manipulation of the python interpreter state. As such you cannot perform multiple profiles at once. Trying to do so will lock out the second profiler unless the global bzrlib.lsprof.BzrProfiler.profiler_block is set to 0. Setting it to 0 will cause profiling to fail rather than blocking. """ profiler_block = 1 """Serialise rather than failing to profile concurrent profile requests.""" profiler_lock = threading.Lock() """Global lock used to serialise profiles.""" def start(self): """Start profiling. This hooks into threading and will record all calls made until stop() is called. """ self._g_threadmap = {} self.p = Profiler() permitted = self.__class__.profiler_lock.acquire( self.__class__.profiler_block) if not permitted: raise errors.InternalBzrError(msg="Already profiling something") try: self.p.enable(subcalls=True) threading.setprofile(self._thread_profile) except: self.__class__.profiler_lock.release() raise def stop(self): """Stop profiling. This unhooks from threading and cleans up the profiler, returning the gathered Stats object. :return: A bzrlib.lsprof.Stats object. """ try: self.p.disable() for pp in self._g_threadmap.values(): pp.disable() threading.setprofile(None) p = self.p self.p = None threads = {} for tid, pp in self._g_threadmap.items(): threads[tid] = Stats(pp.getstats(), {}) self._g_threadmap = None return Stats(p.getstats(), threads) finally: self.__class__.profiler_lock.release() def _thread_profile(self, f, *args, **kwds): # we lose the first profile point for a new thread in order to # trampoline a new Profile object into place thr = thread.get_ident() self._g_threadmap[thr] = p = Profiler() # this overrides our sys.setprofile hook: p.enable(subcalls=True, builtins=True) class Stats(object): """Wrapper around the collected data. A Stats instance is created when the profiler finishes. Normal usage is to use save() to write out the data to a file, or pprint() to write human-readable information to the command line. """ def __init__(self, data, threads): self.data = data self.threads = threads def sort(self, crit="inlinetime"): """Sort the data by the supplied critera. :param crit: the data attribute used as the sort key.""" if crit not in profiler_entry.__dict__: raise ValueError, "Can't sort by %s" % crit self.data.sort(lambda b, a: cmp(getattr(a, crit), getattr(b, crit))) for e in self.data: if e.calls: e.calls.sort(lambda b, a: cmp(getattr(a, crit), getattr(b, crit))) def pprint(self, top=None, file=None): """Pretty-print the data as plain text for human consumption. :param top: only output the top n entries. The default value of None means output all data. :param file: the output file; if None, output will default to stdout.""" if file is None: file = sys.stdout d = self.data if top is not None: d = d[:top] cols = "% 12s %12s %11.4f %11.4f %s\n" hcols = "% 12s %12s %12s %12s %s\n" cols2 = "+%12s %12s %11.4f %11.4f + %s\n" file.write(hcols % ("CallCount", "Recursive", "Total(ms)", "Inline(ms)", "module:lineno(function)")) for e in d: file.write(cols % (e.callcount, e.reccallcount, e.totaltime, e.inlinetime, label(e.code))) if e.calls: for se in e.calls: file.write(cols % ("+%s" % se.callcount, se.reccallcount, se.totaltime, se.inlinetime, "+%s" % label(se.code))) def freeze(self): """Replace all references to code objects with string descriptions; this makes it possible to pickle the instance.""" # this code is probably rather ickier than it needs to be! for i in range(len(self.data)): e = self.data[i] if not isinstance(e.code, str): self.data[i] = type(e)((label(e.code),) + e[1:]) if e.calls: for j in range(len(e.calls)): se = e.calls[j] if not isinstance(se.code, str): e.calls[j] = type(se)((label(se.code),) + se[1:]) for s in self.threads.values(): s.freeze() def calltree(self, file): """Output profiling data in calltree format (for KCacheGrind).""" _CallTreeFilter(self.data).output(file) def save(self, filename, format=None): """Save profiling data to a file. :param filename: the name of the output file :param format: 'txt' for a text representation; 'callgrind' for calltree format; otherwise a pickled Python object. A format of None indicates that the format to use is to be found from the filename. If the name starts with callgrind.out, callgrind format is used otherwise the format is given by the filename extension. """ if format is None: basename = os.path.basename(filename) if basename.startswith('callgrind.out'): format = "callgrind" else: ext = os.path.splitext(filename)[1] if len(ext) > 1: format = ext[1:] outfile = open(filename, 'wb') try: if format == "callgrind": self.calltree(outfile) elif format == "txt": self.pprint(file=outfile) else: self.freeze() cPickle.dump(self, outfile, 2) finally: outfile.close() class _CallTreeFilter(object): """Converter of a Stats object to input suitable for KCacheGrind. This code is taken from http://ddaa.net/blog/python/lsprof-calltree with the changes made by J.P. Calderone and Itamar applied. Note that isinstance(code, str) needs to be used at times to determine if the code object is actually an external code object (with a filename, etc.) or a Python built-in. """ def __init__(self, data): self.data = data self.out_file = None def output(self, out_file): self.out_file = out_file out_file.write('events: Ticks\n') self._print_summary() for entry in self.data: self._entry(entry) def _print_summary(self): max_cost = 0 for entry in self.data: totaltime = int(entry.totaltime * 1000) max_cost = max(max_cost, totaltime) self.out_file.write('summary: %d\n' % (max_cost,)) def _entry(self, entry): out_file = self.out_file code = entry.code inlinetime = int(entry.inlinetime * 1000) #out_file.write('ob=%s\n' % (code.co_filename,)) if isinstance(code, str): out_file.write('fi=~\n') else: out_file.write('fi=%s\n' % (code.co_filename,)) out_file.write('fn=%s\n' % (label(code, True),)) if isinstance(code, str): out_file.write('0 %s\n' % (inlinetime,)) else: out_file.write('%d %d\n' % (code.co_firstlineno, inlinetime)) # recursive calls are counted in entry.calls if entry.calls: calls = entry.calls else: calls = [] if isinstance(code, str): lineno = 0 else: lineno = code.co_firstlineno for subentry in calls: self._subentry(lineno, subentry) out_file.write('\n') def _subentry(self, lineno, subentry): out_file = self.out_file code = subentry.code totaltime = int(subentry.totaltime * 1000) #out_file.write('cob=%s\n' % (code.co_filename,)) if isinstance(code, str): out_file.write('cfi=~\n') out_file.write('cfn=%s\n' % (label(code, True),)) out_file.write('calls=%d 0\n' % (subentry.callcount,)) else: out_file.write('cfi=%s\n' % (code.co_filename,)) out_file.write('cfn=%s\n' % (label(code, True),)) out_file.write('calls=%d %d\n' % ( subentry.callcount, code.co_firstlineno)) out_file.write('%d %d\n' % (lineno, totaltime)) _fn2mod = {} def label(code, calltree=False): if isinstance(code, str): return code try: mname = _fn2mod[code.co_filename] except KeyError: for k, v in sys.modules.items(): if v is None: continue if getattr(v, '__file__', None) is None: continue if not isinstance(v.__file__, str): continue if v.__file__.startswith(code.co_filename): mname = _fn2mod[code.co_filename] = k break else: mname = _fn2mod[code.co_filename] = '<%s>'%code.co_filename if calltree: return '%s %s:%d' % (code.co_name, mname, code.co_firstlineno) else: return '%s:%d(%s)' % (mname, code.co_firstlineno, code.co_name) if __name__ == '__main__': import os sys.argv = sys.argv[1:] if not sys.argv: sys.stderr.write("usage: lsprof.py