gfs2-utils/.gitignore0000664000175000017500000000141112173461754013526 0ustar andyandy*.swp Makefile.in aclocal.m4 autoconf autoheader autom4te.cache automake compile configure config.guess config.log config.sub config.status config.rpath Makefile depcomp install-sh libtoolize ltmain.sh libtool make/stamp-h1 m4 make/clusterautoconfig.h* missing ylwrap cscope.out .gdb_history *.pc .deps .libs *.o *.la *.lo gfs2/convert/gfs2_convert gfs2/edit/gfs2_edit gfs2/libgfs2/gfs2l gfs2/libgfs2/parser.c gfs2/libgfs2/parser.h gfs2/libgfs2/lexer.c gfs2/libgfs2/lexer.h gfs2/fsck/fsck.gfs2 gfs2/mkfs/mkfs.gfs2 gfs2/tune/tunegfs2 test-driver tests/check_libgfs2 tests/testvol tests/tests.log ABOUT-NLS po/Makevars.template po/POTFILES po/stamp-po po/remove-potcdate.sed po/Makefile.in.in po/Rules-quot po/boldquot.sed po/insert-header.sin po/quot.sed po/remove-potcdate.sin gfs2-utils/Makefile.am0000664000175000017500000000070712154127655013577 0ustar andyandyEXTRA_DIST = autogen.sh README.build AUTOMAKE_OPTIONS = foreign MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure depcomp \ config.guess config.sub missing install-sh \ autoheader automake autoconf libtool libtoolize \ ltmain.sh compile make/clusterautoconfig.h.in \ make/clusterautoconfig.h.in~ noinst_HEADERS = make/copyright.cf ACLOCAL_AMFLAGS = -I m4 SUBDIRS = po gfs2 doc tests maintainer-clean-local: rm -rf m4 gfs2-utils/README.build0000664000175000017500000000075512173454630013521 0ustar andyandy To build this source tree, you will need: - automake - GNU make - GCC tool chain Plus the following libraries: ncurses (for gfs2_edit) gettext bison flex zlib libblkid check (optional, enables the test suite) To build gfs2-utils, run the following commands: ./autogen.sh ./configure make To run the test suite: make check To install gfs2-utils, run: make install See also doc/README.contributing for details on submitting patches. gfs2-utils/autogen.sh0000775000175000017500000000020312110647577013535 0ustar andyandy#!/bin/sh # Run this to generate all the initial makefiles, etc. mkdir -p m4 autoreconf -i -v && echo Now run ./configure and make gfs2-utils/configure.ac0000664000175000017500000001245212154127655014031 0ustar andyandy # Process this file with autoconf to produce a configure script. AC_PREREQ([2.63]) AC_INIT([gfs2-utils], [master], [linux-cluster@redhat.com]) AM_INIT_AUTOMAKE([-Wno-portability]) AM_SILENT_RULES([yes]) LT_PREREQ([2.2.6]) LT_INIT AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_SRCDIR([gfs2/libgfs2/libgfs2.h]) AC_CONFIG_HEADERS([make/clusterautoconfig.h]) AC_CANONICAL_HOST AC_PROG_LIBTOOL AC_LANG([C]) #i18n support AM_GNU_GETTEXT([external]) AM_GNU_GETTEXT_VERSION([0.18]) # Sanitize path if test "$prefix" = "NONE"; then prefix="/usr" if test "$localstatedir" = "\${prefix}/var"; then localstatedir="/var" fi if test "$sysconfdir" = "\${prefix}/etc"; then sysconfdir="/etc" fi if test "$libdir" = "\${exec_prefix}/lib"; then if test -e /usr/lib64; then libdir="/usr/lib64" else libdir="/usr/lib" fi fi fi case $exec_prefix in NONE) exec_prefix=$prefix;; prefix) exec_prefix=$prefix;; esac # Checks for programs. # check stolen from gnulib/m4/gnu-make.m4 if ! ${MAKE-make} --version /cannot/make/this >/dev/null 2>&1; then AC_MSG_ERROR([you don't seem to have GNU make; it is required]) fi AC_PROG_CC AM_PROG_CC_C_O AC_PROG_LN_S AC_PROG_INSTALL AC_PROG_MAKE_SET AC_PROG_LEX AC_CHECK_PROG([YACC], [bison], [bison -y]) test x"$YACC" = x && AC_MSG_ERROR([bison not found]) ## local helper functions # this function checks if CC support options passed as # args. Global CFLAGS are ignored during this test. cc_supports_flag() { local CFLAGS="$@" AC_MSG_CHECKING([whether $CC supports "$@"]) AC_COMPILE_IFELSE([AC_LANG_SOURCE([int main(){return 0;}])], [RC=0; AC_MSG_RESULT([yes])], [RC=1; AC_MSG_RESULT([no])]) return $RC } # this function tests if a library has a certain function # by using AC_CHECK_LIB but restores the original LIBS global # envvar. This is required to avoid libtool to link everything # with everything. check_lib_no_libs() { AC_CHECK_LIB([$1], [$2],, [AC_MSG_ERROR([Unable to find $1 library])]) LIBS=$ac_check_lib_save_LIBS } # local options AC_ARG_ENABLE([debug], [ --enable-debug enable debug build. ], [ default="no" ]) # We use the Check framework for unit tests PKG_CHECK_MODULES([check], [check >= 0.9.8], [have_check=yes], [have_check=no]) AM_CONDITIONAL([BUILD_TESTS], [test "x$have_check" = "xyes"]) PKG_CHECK_MODULES([zlib],[zlib]) PKG_CHECK_MODULES([blkid],[blkid]) # old versions of ncurses don't ship pkg-config files PKG_CHECK_MODULES([ncurses],[ncurses],, [check_lib_no_libs ncurses printw]) if test -z "$ncurses_CFLAGS" && test -z "$ncurses_LIBS"; then ncurses_LIBS=-lncurses fi # external libs (no pkgconfig) check_lib_no_libs pthread pthread_mutex_lock # Checks for header files. AC_CHECK_HEADERS([arpa/inet.h fcntl.h inttypes.h libintl.h limits.h locale.h mntent.h netdb.h netinet/in.h stddef.h stdint.h stdlib.h string.h sys/file.h sys/ioctl.h sys/mount.h sys/param.h sys/socket.h sys/time.h sys/vfs.h syslog.h termios.h unistd.h]) AC_CHECK_HEADERS([linux/dlmconstants.h linux/limits.h linux/types.h linux/netlink.h linux/fs.h],, [AC_MSG_ERROR([Unable to find all required kernel headers.])]) AC_CHECK_HEADERS([linux/gfs2_ondisk.h], [AC_CHECK_MEMBERS([struct gfs2_sb.sb_uuid],, [AC_MSG_ERROR([Unable to find gfs2 uuid support in your headers. Please update your kernel headers to a more recent version])], [#include ])], [AC_MSG_ERROR([Unable to find required kernel headers.])]) # Checks for typedefs, structures, and compiler characteristics. AC_C_INLINE AC_TYPE_INT64_T AC_TYPE_MODE_T AC_TYPE_OFF_T AC_TYPE_PID_T AC_TYPE_SIZE_T AC_TYPE_SSIZE_T AC_STRUCT_ST_BLOCKS AC_TYPE_UINT16_T AC_TYPE_UINT32_T AC_TYPE_UINT64_T AC_TYPE_UINT8_T # Checks for library functions. AC_FUNC_CHOWN AC_FUNC_FORK AC_FUNC_LSTAT_FOLLOWS_SLASHED_SYMLINK AC_HEADER_MAJOR AC_FUNC_MALLOC AC_FUNC_REALLOC AC_CHECK_FUNCS([ftruncate gettimeofday memset realpath rmdir select setlocale socket strcasecmp strchr strdup strerror strstr]) ## *FLAGS handling ENV_CFLAGS="$CFLAGS" ENV_CPPFLAGS="$CPPFLAGS" ENV_LDFLAGS="$LDFLAGS" # debug build stuff if test "x${enable_debug}" = xyes; then AC_DEFINE_UNQUOTED([DEBUG], [1], [Compiling Debugging code]) OPT_CFLAGS="-O0" else OPT_CFLAGS="-O2" fi # gdb flags if test "x${GCC}" = xyes; then GDB_FLAGS="-ggdb3" else GDB_FLAGS="-g" fi # extra warnings EXTRA_WARNINGS="" WARNLIST=" all shadow missing-prototypes missing-declarations strict-prototypes declaration-after-statement pointer-arith write-strings cast-align bad-function-cast missing-format-attribute format=2 format-security format-nonliteral no-long-long unsigned-char gnu89-inline no-strict-aliasing " for j in $WARNLIST; do if cc_supports_flag -W$j; then EXTRA_WARNINGS="$EXTRA_WARNINGS -W$j"; fi done CFLAGS="$ENV_CFLAGS $OPT_CFLAGS $GDB_FLAGS \ $EXTRA_WARNINGS $WERROR_CFLAGS" CPPFLAGS="-I\$(top_builddir)/make -I\$(top_srcdir)/make \ -I. $ENV_CPPFLAGS" LDFLAGS="$ENV_LDFLAGS" AC_CONFIG_FILES([Makefile gfs2/Makefile gfs2/include/Makefile gfs2/libgfs2/Makefile gfs2/convert/Makefile gfs2/edit/Makefile gfs2/fsck/Makefile gfs2/mkfs/Makefile gfs2/tune/Makefile gfs2/man/Makefile gfs2/scripts/Makefile doc/Makefile tests/Makefile po/Makefile.in ]) AC_OUTPUT test x"$have_check" = "xyes" || AC_MSG_NOTICE([package 'check' not found; unit tests will not be built]) gfs2-utils/doc/COPYING.applications0000664000175000017500000004310312110647577016027 0ustar andyandy GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. gfs2-utils/doc/COPYING.libraries0000664000175000017500000006363712110647577015333 0ustar andyandy GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! gfs2-utils/doc/COPYRIGHT0000664000175000017500000000315612110647577013606 0ustar andyandyUnless specified otherwise in the "exceptions section" below: Copyright (C) 1997-2003 Sistina Software, Inc. All rights reserved. Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. Exceptions: gfs2/man/gfs2_mount.8: Portions copyright (C) 2001-2003 The OpenGFS2 Project Portions copyright (C) 2004 Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. Authors as known by current RCS as of the time of writing: Abhijith Das Adam Manthei A. J. Lewis Alasdair G. Kergon Andrew Price Benjamin Marzinski Bob Peterson Chris Feist Christine Caulfield Daniel Phillips David Teigland Fabio M. Di Nitto James Parsons Joel Becker Jonathan Brassow jparsons Ken Preslan Lon Hohberger Marc - A. Dahlhaus Marek 'marx' Grac Mark Hlawatschek Michael Conrad Tadpol Tilstra Patrick Caulfield Robert Peterson Ross Vandegrift Ryan McCabe Ryan O'Hara Stanko Kupcevic Steven Whitehouse Wendy Cheng gfs2-utils/doc/Makefile.am0000664000175000017500000000031312173461546014336 0ustar andyandyMAINTAINERCLEANFILES = Makefile.in dist_doc_DATA = gfs2.txt \ journaling.txt \ COPYING.applications \ COPYING.libraries \ COPYRIGHT \ README.contributing \ README.licence gfs2-utils/doc/README.contributing0000664000175000017500000000375312110647577015704 0ustar andyandyContributing to gfs2-utils -------------------------- Here are some brief guidelines to follow when contributing to gfs2-utils. Translations ------------ We use the Transifex translation service: https://transifex.com/projects/p/gfs2-utils/ See the documentation there for submitting translations. Patches ------- We don't dictate any particular coding style but please try to use a style consistent with the existing code. If in doubt, the Linux kernel coding style document is a good guideline: http://www.kernel.org/doc/Documentation/CodingStyle We use git for managing our source code and we assume here that you're familiar with git. Patches should apply cleanly to the latest master branch of gfs2-utils.git http://git.fedorahosted.org/cgit/gfs2-utils.git For ease of review and maintenance each of your patches should address a single issue and if there are multiple issues please consider spreading your work over several patches. Ideally none of the individual patches should break the build. We value good commit logs, which should be of the form: component: short patch summary Longer description wrapped at approx. 72 columns explaining the problem the patch addresses and how the patch addresses it. Signed-off-by: Your Name The "component" should be the name of the tool or the part of the code which the patch touches. As we share a mailing list with several projects it should make clear that it's a gfs2-utils patch. Some examples: Bad short logs: Fix a bug Add a test Good short logs: fsck.gfs2: Fix a null pointer dereference in foo gfs2-utils: Add a test for lgfs2_do_stuff Be sure to reference any relevant bug reports in your long description, e.g. Ref: rhbz#012345 Fixes: rhbz#98765 Please send patches to . We recommend using `git format-patch' to generate patch emails from your commits and `git send-email' for sending them to the list. See the git documentation for details. gfs2-utils/doc/README.licence0000664000175000017500000000253612110647577014575 0ustar andyandyThe Red Hat Cluster is a collection of free software built on top of different libraries and applications. For a detailed list of authors and copyright holders, please check the included COPYRIGHT file. Libraries: You can redistribute them and/or modify them under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The libraries are distributed in the hope that they will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. Applications: You can redistribute them and/or modify them under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The applications are distributed in the hope that they will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. A copy of each license is included for your convenience in COPYING.applications and COPYING.libraries. If missing, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. gfs2-utils/doc/cluster.fig0000664000175000017500000001131612110647577014460 0ustar andyandy#FIG 3.2 Produced by xfig version 3.2.5a Landscape Center Inches Letter 100.00 Single -2 1200 2 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 1 7332.065 8049.457 3900 5025 3075 6375 2775 7650 1 1 1.00 60.00 120.00 1 0 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 0 2640.072 5956.954 3300 7650 4275 6750 4200 5025 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 1 2812.500 5137.500 2175 4350 2775 4125 3450 4350 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 1 3770.455 7155.682 1650 5925 2700 4950 3450 4725 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 1 0 1 9962.903 13539.746 9075 5025 4950 6600 3750 7650 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 0 10339.926 6368.284 9300 5025 8700 5925 9225 7650 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 0 0 1 7917.672 6484.914 9525 5025 10050 6075 9750 7650 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 1 7531.731 2331.731 4125 3975 3750 2400 3900 1275 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 1 10477.754 2669.174 8850 3975 8400 2475 8925 1275 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 1 6825.000 2625.000 9675 3975 9975 2475 9675 1275 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 1 2690.779 2326.844 5025 1275 5250 2400 4650 3975 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 1 6675.000 6531.250 5250 4500 6675 4050 8100 4500 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 1 10499.364 5185.805 9900 4500 10500 4275 11175 4575 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 1 6478.423 21308.838 3975 8175 5925 7950 8550 8100 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 1 6102.404 -1403.365 3975 8400 5850 8625 8550 8325 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 1 9490.909 6835.048 11400 7650 11475 6225 9900 4800 1 1 1.00 60.00 120.00 1 1 1.00 60.00 120.00 1 2 0 1 0 4 100 -1 20 0.000 1 0.0000 8534 4503 3825 600 4709 4503 12359 4503 1 2 0 1 0 4 100 -1 20 0.000 1 0.0000 1961 4596 1575 2250 386 4596 3536 4596 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 225 7425 12825 7425 2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5 12675 1275 600 1275 600 375 12675 375 12675 1275 2 4 0 1 0 1 50 -1 20 0.000 0 0 7 0 0 5 3975 8625 825 8625 825 7650 3975 7650 3975 8625 2 4 0 1 0 1 50 -1 20 0.000 0 0 7 0 0 5 12225 8550 8550 8550 8550 7650 12225 7650 12225 8550 2 4 0 1 0 2 50 -1 20 0.000 0 0 7 0 0 5 2250 6975 375 6975 375 5925 2250 5925 2250 6975 2 4 0 1 0 2 50 -1 20 0.000 0 0 7 0 0 5 2175 5025 300 5025 300 3975 2175 3975 2175 5025 2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5 5250 5025 3450 5025 3450 3975 5250 3975 5250 5025 2 4 0 1 0 6 50 -1 20 0.000 0 0 7 0 0 5 9900 5025 8100 5025 8100 3975 9900 3975 9900 5025 2 4 0 1 0 2 50 -1 20 0.000 0 0 7 0 0 5 13050 5025 11175 5025 11175 3975 13050 3975 13050 5025 4 0 0 50 -1 0 12 0.0000 4 180 780 4050 900 Corosync\001 4 0 0 50 -1 0 12 0.0000 4 180 930 675 4650 gfs_control\001 4 0 0 50 -1 0 12 0.0000 4 180 1035 3825 4575 gfs_controld\001 4 0 0 50 -1 0 12 0.0000 4 180 1035 825 6525 mount.gfs[2]\001 4 0 0 50 -1 0 12 0.0000 4 165 570 1950 8250 GFS[2]\001 4 0 0 50 -1 0 12 0.0000 4 135 435 9900 8175 DLM\001 4 0 0 50 -1 0 12 0.0000 4 135 540 5100 7650 Kernel\001 4 0 0 50 -1 0 12 0.0000 4 180 810 5100 7350 Userspace\001 4 0 0 50 -1 0 12 0.0000 4 180 420 6375 6000 Sysfs\001 4 0 0 50 -1 0 12 0.0000 4 135 1005 2400 5325 Unix Socket\001 4 0 0 50 -1 0 12 0.0000 4 135 1005 2400 4050 Unix Socket\001 4 0 0 50 -1 0 12 0.0000 4 135 585 4500 5625 Uevent\001 4 0 0 50 -1 0 12 0.0000 4 180 420 8700 6375 Sysfs\001 4 0 0 50 -1 0 12 0.0000 4 135 585 10050 5775 Uevent\001 4 0 0 50 -1 0 12 4.7124 4 180 2235 3525 1575 CPG "gfs:mount:"\001 4 0 0 50 -1 0 12 4.7124 4 180 1560 4875 1575 CPG "gfs:controld"\001 4 0 0 50 -1 0 12 4.7124 4 135 1635 8700 1800 CPG "dlm:controld"\001 4 0 0 50 -1 0 12 4.7124 4 135 1920 10050 1725 GPG "dlm:ls:"\001 4 0 0 50 -1 0 12 0.0000 4 165 1110 8550 4575 dlm_controld\001 4 0 0 50 -1 0 12 0.0000 4 165 1005 11625 4575 dlm_control\001 4 0 0 50 -1 0 12 0.0000 4 135 1005 10050 4200 Unix Socket\001 4 0 0 50 -1 0 12 0.0000 4 180 2160 5100 8775 Posix lock requests/replies\001 4 0 0 50 -1 0 12 0.0000 4 180 2370 5850 7875 DLM lock requests/callbacks\001 4 0 0 50 -1 0 12 0.0000 4 165 1050 11550 6150 (Posix locks)\001 4 0 0 50 -1 0 12 0.0000 4 135 1020 11475 5925 Misc Device\001 4 0 0 50 -1 0 12 0.0000 4 135 1005 6225 4275 Unix Socket\001 4 0 4 50 -1 0 12 0.0000 4 135 1125 6975 3825 libdlmcontrol\001 4 0 4 50 -1 0 12 0.0000 4 180 1050 1350 2250 libgfscontrol\001 4 0 0 50 -1 0 12 0.0000 4 180 420 3000 6750 Sysfs\001 gfs2-utils/doc/gfs2.txt0000664000175000017500000000343612110647577013716 0ustar andyandyGlobal File System ------------------ http://sources.redhat.com/cluster/ GFS2 is a cluster file system. It allows a cluster of computers to simultaneously use a block device that is shared between them (with FC, iSCSI, NBD, etc). GFS2 reads and writes to the block device like a local file system, but also uses a lock module to allow the computers coordinate their I/O so file system consistency is maintained. One of the nifty features of GFS2 is perfect consistency -- changes made to the file system on one machine show up immediately on all other machines in the cluster. GFS2 uses interchangable inter-node locking mechanisms. The currently supported methods are: lock_nolock -- does no real locking and allows gfs to be used as a local file system lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking The dlm is found at linux/fs/dlm/ Lock_dlm depends on user space cluster management systems found at the URL above. To use GFS2 as a local file system, no external clustering systems are needed, simply: $ gfs2_mkfs -p lock_nolock -j 1 /dev/block_device $ mount -t gfs2 /dev/block_device /dir GFS2 is not on-disk compatible with previous versions of GFS, but it does use a very smilar on-disk format, so that upgrading a filesystem can be done in place and makes relatively few changes. Upgrading a filesystem to GFS2 is not currently reversible. The following man pages can be found at the URL above: mkfs.gfs2 to make a filesystem fsck.gfs2 to repair a filesystem gfs2_grow to expand a filesystem online gfs2_jadd to add journals to a filesystem online gfs2_tool to manipulate, examine and tune a filesystem gfs2_quota to examine and change quota values in a filesystem gfs2_convert to convert a gfs filesystem to gfs2 mount.gfs2 to find mount options gfs2-utils/doc/journaling.txt0000664000175000017500000001727512110647577015233 0ustar andyandyo Journaling & Replay The fundamental problem with a journaled cluster filesystem is handling journal replay with multiple journals. A single block of metadata can be modified sequentially by many different nodes in the cluster. As the block is modified by each node, it gets logged in the journal for each node. If care is not taken, it's possible to get into a situation where a journal replay can actually corrupt a filesystem. The error scenario is: 1) Node A modifies a metadata block by putting a updated copy into its incore log. 2) Node B wants to read and modify the block so it requests the lock and a blocking callback is sent to Node A. 3) Node A flushes its incore log to disk, and then syncs out the metadata block to its inplace location. 4) Node A then releases the lock. 5) Node B reads in the block and puts a modified copy into its ondisk log and then the inplace block location. 6) Node A crashes. At this point, Node A's journal needs to be replayed. Since there is a newer version of block inplace, if that block is replayed, the filesystem will be corrupted. There are a few different ways of avoiding this problem. 1) Generation Numbers (GFS1) Each metadata block has header in it that contains a 64-bit generation number. As each block is logged into a journal, the generation number is incremented. This provides a strict ordering of the different versions of the block a they are logged in the FS' different journals. When journal replay happens, each block in the journal is not replayed if generation number in the journal is less than the generation number in place. This ensures that a newer version of a block is never replaced with an older version. So, this solution basically allows multiple copies of the same block in different journals, but it allows you to always know which is the correct one. Pros: A) This method allows the fastest callbacks. To release a lock, the incore log for the lock must be flushed and then the inplace data and metadata must be synced. That's it. The sync operations involved are: start the log body and wait for it to become stable on the disk, synchronously write the commit block, start the inplace metadata and wait for it to become stable on the disk. Cons: A) Maintaining the generation numbers is expensive. All newly allocated metadata block must be read off the disk in order to figure out what the previous value of the generation number was. When deallocating metadata, extra work and care must be taken to make sure dirty data isn't thrown away in such a way that the generation numbers stop doing their thing. B) You can't continue to modify the filesystem during journal replay. Basically, replay of a block is a read-modify-write operation: the block is read from disk, the generation number is compared, and (maybe) the new version is written out. Replay requires that the R-M-W operation is atomic with respect to other R-M-W operations that might be happening (say by a normal I/O process). Since journal replay doesn't (and can't) play by the normal metadata locking rules, you can't count on them to protect replay. Hence GFS1, quieces all writes on a filesystem before starting replay. This provides the mutual exclusion required, but it's slow and unnecessarily interrupts service on the whole cluster. 2) Total Metadata Sync (OCFS2) This method is really simple in that it uses exactly the same infrastructure that a local journaled filesystem uses. Every time a node receives a callback, it stops all metadata modification, syncs out the whole incore journal, syncs out any dirty data, marks the journal as being clean (unmounted), and then releases the lock. Because journal is marked as clean and recovery won't look at any of the journaled blocks in it, a valid copy of any particular block only exists in one journal at a time and that journal always the journal who modified it last. Pros: A) Very simple to implement. B) You can reuse journaling code from other places (such as JBD). C) No quiece necessary for replay. D) No need for generation numbers sprinkled throughout the metadata. Cons: A) This method has the slowest possible callbacks. The sync operations are: stop all metadata operations, start and wait for the log body, write the log commit block, start and wait for all the FS' dirty metadata, write an unmount block. Writing the metadata for the whole filesystem can be particularly expensive because it can be scattered all over the disk and there can be a whole journal's worth of it. 3) Revocation of a lock's buffers (GFS2) This method prevents a block from appearing in more than one journal by canceling out the metadata blocks in the journal that belong to the lock being released. Journaling works very similarly to a local filesystem or to #2 above. The biggest difference is you have to keep track of buffers in the active region of the ondisk journal, even after the inplace blocks have been written back. This is done in GFS2 by adding a second part to the Active Items List. The first part (in GFS2 called AIL1) contains a list of all the blocks which have been logged to the journal, but not written back to their inplace location. Once an item in AIL1 has been written back to its inplace location, it is moved to AIL2. Once the tail of the log moves past the block's transaction in the log, it can be removed from AIL2. When a callback occurs, the log is flushed to the disk and the metadata for the lock is synced to disk. At this point, any metadata blocks for the lock that are in the current active region of the log will be in the AIL2 list. We then build a transaction that contains revoke tags for each buffer in the AIL2 list that belongs to that lock. Pros: A) No quiece necessary for Replay B) No need for generation numbers sprinkled throughout the metadata. C) The sync operations are: stop all metadata operations, start and wait for the log body, write the log commit block, start and wait for all the FS' dirty metadata, start and wait for the log body of a transaction that revokes any of the lock's metadata buffers in the journal's active region, and write the commit block for that transaction. Cons: A) Recovery takes two passes, one to find all the revoke tags in the log and one to replay the metadata blocks using the revoke tags as a filter. This is necessary for a local filesystem and the total sync method, too. It's just that there will probably be more tags. Comparing #2 and #3, both do extra I/O during a lock callback to make sure that any metadata blocks in the log for that lock will be removed. I believe #2 will be slower because syncing out all the dirty metadata for entire filesystem requires lots of little, scattered I/O across the whole disk. The extra I/O done by #3 is a log write to the disk. So, not only should it be less I/O, but it should also be better suited to get good performance out of the disk subsystem. KWP 07/06/05 Further notes (Steven Whitehouse) ------------- Number 3 is slow due to having to do two write/wait transactions in the log each time we release a glock. So far as I can see there is no way around that, but it should be possible, if we so wish to change to using #2 at some future date and still remain backward compatible. So that option is open to us, but I'm not sure that we want to take it yet. There may well be other ways to speed things up in this area. More work remains to be done. gfs2-utils/gfs2/Makefile.am0000664000175000017500000000016712110647577014442 0ustar andyandyMAINTAINERCLEANFILES = Makefile.in SUBDIRS = libgfs2 convert edit fsck mkfs man \ tune include scripts #init.d gfs2-utils/gfs2/convert/Makefile.am0000664000175000017500000000041112110647577016112 0ustar andyandyMAINTAINERCLEANFILES = Makefile.in sbin_PROGRAMS = gfs2_convert gfs2_convert_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -DHELPER_PROGRAM \ -I$(top_srcdir)/gfs2/include \ -I$(top_srcdir)/gfs2/libgfs2 gfs2_convert_LDADD = $(top_builddir)/gfs2/libgfs2/libgfs2.la gfs2-utils/gfs2/convert/gfs2_convert.c0000664000175000017500000024004612154127655016633 0ustar andyandy/***************************************************************************** ****************************************************************************** ** ** gfs2_convert - convert a gfs1 filesystem into a gfs2 filesystem. ** ****************************************************************************** *****************************************************************************/ #include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _(String) gettext(String) #include #include #include "osi_list.h" #include "copyright.cf" #include "libgfs2.h" /* The following declares are needed because gfs2 can't have */ /* dependencies on gfs1: */ #define RGRP_STUFFED_BLKS(sb) (((sb)->sb_bsize - sizeof(struct gfs2_rgrp)) * GFS2_NBBY) #define RGRP_BITMAP_BLKS(sb) (((sb)->sb_bsize - sizeof(struct gfs2_meta_header)) * GFS2_NBBY) /* Define some gfs1 constants from gfs1's gfs_ondisk.h */ #define GFS_METATYPE_NONE (0) #define GFS_METATYPE_SB (1) /* Super-Block */ #define GFS_METATYPE_RG (2) /* Resource Group Header */ #define GFS_METATYPE_RB (3) /* Resource Group Block Alloc BitBlock */ #define GFS_METATYPE_DI (4) /* "Disk" inode (dinode) */ #define GFS_METATYPE_IN (5) /* Indirect dinode block list */ #define GFS_METATYPE_LF (6) /* Leaf dinode block list */ #define GFS_METATYPE_JD (7) /* Journal Data */ #define GFS_METATYPE_LH (8) /* Log Header (gfs_log_header) */ #define GFS_METATYPE_LD (9) /* Log Descriptor (gfs_log_descriptor) */ #define GFS_METATYPE_EA (10) /* Extended Attribute */ #define GFS_METATYPE_ED (11) /* Extended Attribute data */ /* GFS1 Dinode types */ #define GFS_FILE_NON (0) #define GFS_FILE_REG (1) /* regular file */ #define GFS_FILE_DIR (2) /* directory */ #define GFS_FILE_LNK (5) /* link */ #define GFS_FILE_BLK (7) /* block device node */ #define GFS_FILE_CHR (8) /* character device node */ #define GFS_FILE_FIFO (101) /* fifo/pipe */ #define GFS_FILE_SOCK (102) /* socket */ #define GFS_FORMAT_SB (100) /* Super-Block */ #define GFS_FORMAT_FS (1309) /* Filesystem (all-encompassing) */ #define GFS_FORMAT_MULTI (1401) /* Multi-Host */ #define DIV_RU(x, y) (((x) + (y) - 1) / (y)) struct gfs1_rgrp { struct gfs2_meta_header rg_header; /* hasn't changed from gfs1 to 2 */ uint32_t rg_flags; uint32_t rg_free; /* Number (qty) of free data blocks */ /* Dinodes are USEDMETA, but are handled separately from other METAs */ uint32_t rg_useddi; /* Number (qty) of dinodes (used or free) */ uint32_t rg_freedi; /* Number (qty) of unused (free) dinodes */ struct gfs2_inum rg_freedi_list; /* hasn't changed from gfs1 to 2 */ /* These META statistics do not include dinodes (used or free) */ uint32_t rg_usedmeta; /* Number (qty) of used metadata blocks */ uint32_t rg_freemeta; /* Number (qty) of unused metadata blocks */ char rg_reserved[64]; }; struct gfs1_jindex { uint64_t ji_addr; /* starting block of the journal */ uint32_t ji_nsegment; /* number (quantity) of segments in journal */ uint32_t ji_pad; char ji_reserved[64]; }; struct gfs1_sb { /* Order is important; need to be able to read old superblocks in order to support on-disk version upgrades */ struct gfs2_meta_header sb_header; uint32_t sb_fs_format; /* GFS_FORMAT_FS (on-disk version) */ uint32_t sb_multihost_format; /* GFS_FORMAT_MULTI */ uint32_t sb_flags; /* ?? */ uint32_t sb_bsize; /* fundamental FS block size in bytes */ uint32_t sb_bsize_shift; /* log2(sb_bsize) */ uint32_t sb_seg_size; /* Journal segment size in FS blocks */ /* These special inodes do not appear in any on-disk directory. */ struct gfs2_inum sb_jindex_di; /* journal index inode */ struct gfs2_inum sb_rindex_di; /* resource group index inode */ struct gfs2_inum sb_root_di; /* root directory inode */ /* Default inter-node locking protocol (lock module) and namespace */ char sb_lockproto[GFS2_LOCKNAME_LEN]; /* lock protocol name */ char sb_locktable[GFS2_LOCKNAME_LEN]; /* unique name for this FS */ /* More special inodes */ struct gfs2_inum sb_quota_di; /* quota inode */ struct gfs2_inum sb_license_di; /* license inode */ char sb_reserved[96]; }; struct inode_dir_block { osi_list_t list; uint64_t di_addr; uint64_t di_paddr; /* Parent dir inode addr */ }; struct inode_block { osi_list_t list; uint64_t di_addr; }; struct blocklist { osi_list_t list; uint64_t block; struct metapath mp; int height; char *ptrbuf; }; struct gfs2_options { char *device; unsigned int yes:1; unsigned int no:1; unsigned int query:1; }; struct gfs1_sb raw_gfs1_ondisk_sb; struct gfs2_sbd sb2; char device[256]; struct inode_block dirs_to_fix; /* linked list of directories to fix */ struct inode_dir_block cdpns_to_fix; /* linked list of cdpn symlinks */ int seconds; struct timeval tv; uint64_t dirs_fixed; uint64_t cdpns_fixed; uint64_t dirents_fixed; struct gfs1_jindex *sd_jindex = NULL; /* gfs1 journal index in memory */ int gfs2_inptrs; uint64_t gfs2_heightsize[GFS2_MAX_META_HEIGHT]; uint64_t gfs2_jheightsize[GFS2_MAX_META_HEIGHT]; uint32_t gfs2_max_height; uint32_t gfs2_max_jheight; uint64_t jindex_addr = 0, rindex_addr = 0; /* ------------------------------------------------------------------------- */ /* This function is for libgfs's sake. */ /* ------------------------------------------------------------------------- */ void print_it(const char *label, const char *fmt, const char *fmt2, ...) { va_list args; va_start(args, fmt2); printf("%s: ", label); vprintf(fmt, args); va_end(args); } /* ------------------------------------------------------------------------- */ /* convert_bitmaps - Convert gfs1 bitmaps to gfs2 bitmaps. */ /* Fixes all unallocated metadata bitmap states (which are */ /* valid in gfs1 but invalid in gfs2). */ /* ------------------------------------------------------------------------- */ static void convert_bitmaps(struct gfs2_sbd *sdp, struct rgrp_tree *rg) { uint32_t blk; int x, y; struct gfs2_rindex *ri; unsigned char state; ri = &rg->ri; for (blk = 0; blk < ri->ri_length; blk++) { x = (blk) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_rgrp); for (; x < sdp->bsize; x++) for (y = 0; y < GFS2_NBBY; y++) { state = (rg->bh[blk]->b_data[x] >> (GFS2_BIT_SIZE * y)) & 0x03; if (state == 0x02) {/* unallocated metadata state invalid */ rg->bh[blk]->b_data[x] &= ~(0x02 << (GFS2_BIT_SIZE * y)); bmodified(rg->bh[blk]); } } } }/* convert_bitmaps */ /* ------------------------------------------------------------------------- */ /* convert_rgs - Convert gfs1 resource groups to gfs2. */ /* Returns: 0 on success, -1 on failure */ /* ------------------------------------------------------------------------- */ static int convert_rgs(struct gfs2_sbd *sbp) { struct rgrp_tree *rgd; struct osi_node *n, *next = NULL; struct gfs1_rgrp *rgd1; int rgs = 0; /* --------------------------------- */ /* Now convert its rgs into gfs2 rgs */ /* --------------------------------- */ for (n = osi_first(&sbp->rgtree); n; n = next) { next = osi_next(n); rgd = (struct rgrp_tree *)n; rgd1 = (struct gfs1_rgrp *)&rgd->rg; /* recast as gfs1 structure */ /* rg_freemeta is a gfs1 structure, so libgfs2 doesn't know to */ /* convert from be to cpu. We must do it now. */ rgd->rg.rg_free = rgd1->rg_free + be32_to_cpu(rgd1->rg_freemeta); /* Zero it out so we don't add it again in case something breaks */ /* later on in the process and we have to re-run convert */ rgd1->rg_freemeta = 0; sbp->blks_total += rgd->ri.ri_data; sbp->blks_alloced += (rgd->ri.ri_data - rgd->rg.rg_free); sbp->dinodes_alloced += rgd1->rg_useddi; convert_bitmaps(sbp, rgd); /* Write the updated rgrp to the gfs2 buffer */ gfs2_rgrp_out_bh(&rgd->rg, rgd->bh[0]); rgs++; if (rgs % 100 == 0) { printf("."); fflush(stdout); } } return 0; }/* superblock_cvt */ /* ------------------------------------------------------------------------- */ /* calc_gfs2_tree_height - calculate new dinode height as if this is gfs2 */ /* */ /* This is similar to calc_tree_height in libgfs2 but at the point this */ /* function is called, I have the wrong (gfs1 not gfs2) constants in place. */ /* ------------------------------------------------------------------------- */ static unsigned int calc_gfs2_tree_height(struct gfs2_inode *ip, uint64_t size) { uint64_t *arr; unsigned int max, height; if (ip->i_di.di_size > size) size = ip->i_di.di_size; if (S_ISDIR(ip->i_di.di_mode)) { arr = gfs2_jheightsize; max = gfs2_max_jheight; } else { arr = gfs2_heightsize; max = gfs2_max_height; } for (height = 0; height < max; height++) if (arr[height] >= size) break; /* If calc_gfs2_tree_height was called, the dinode is not stuffed or we would have returned before this point. After the call, a call is made to fix_metatree, which unstuffs the dinode. Therefore, the smallest height that can result after this call is 1. */ if (!height) height = 1; return height; } /* ------------------------------------------------------------------------- */ /* mp_gfs1_to_gfs2 - convert a gfs1 metapath to a gfs2 metapath. */ /* ------------------------------------------------------------------------- */ static void mp_gfs1_to_gfs2(struct gfs2_sbd *sbp, int gfs1_h, int gfs2_h, struct metapath *gfs1mp, struct metapath *gfs2mp) { uint64_t lblock; int h; uint64_t gfs1factor[GFS2_MAX_META_HEIGHT]; uint64_t gfs2factor[GFS2_MAX_META_HEIGHT]; /* figure out multiplication factors for each height - gfs1 */ memset(&gfs1factor, 0, sizeof(gfs1factor)); gfs1factor[gfs1_h - 1] = 1ull; for (h = gfs1_h - 1; h > 0; h--) gfs1factor[h - 1] = gfs1factor[h] * sbp->sd_inptrs; /* figure out multiplication factors for each height - gfs2 */ memset(&gfs2factor, 0, sizeof(gfs2factor)); gfs2factor[gfs2_h - 1] = 1ull; for (h = gfs2_h - 1; h > 0; h--) gfs2factor[h - 1] = gfs2factor[h] * gfs2_inptrs; /* Convert from gfs1 to a logical block */ lblock = 0; for (h = 0; h < gfs1_h; h++) lblock += (gfs1mp->mp_list[h] * gfs1factor[h]); /* Convert from a logical block back to gfs2 */ memset(gfs2mp, 0, sizeof(*gfs2mp)); for (h = 0; h < gfs2_h; h++) { /* Can't use do_div here because the factors are too large. */ gfs2mp->mp_list[h] = lblock / gfs2factor[h]; lblock %= gfs2factor[h]; } } /* ------------------------------------------------------------------------- */ /* fix_metatree - Fix up the metatree to match the gfs2 metapath info */ /* Similar to gfs2_writei in libgfs2 but we're only */ /* interested in rearranging the metadata while leaving the */ /* actual data blocks intact. */ /* ------------------------------------------------------------------------- */ static void fix_metatree(struct gfs2_sbd *sbp, struct gfs2_inode *ip, struct blocklist *blk, uint64_t *first_nonzero_ptr, unsigned int size) { uint64_t block; struct gfs2_buffer_head *bh; unsigned int amount, ptramt; int hdrsize, h, copied = 0, new; struct gfs2_meta_header mh; char *srcptr = (char *)first_nonzero_ptr; mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_IN; mh.mh_format = GFS2_FORMAT_IN; if (!ip->i_di.di_height) unstuff_dinode(ip); ptramt = blk->mp.mp_list[blk->height] * sizeof(uint64_t); amount = size; while (copied < size) { bh = ip->i_bh; /* First, build up the metatree */ for (h = 0; h < blk->height; h++) { new = 0; lookup_block(ip, bh, h, &blk->mp, 1, &new, &block); if (bh != ip->i_bh) brelse(bh); if (!block) break; bh = bread(sbp, block); if (new) memset(bh->b_data, 0, sbp->bsize); gfs2_meta_header_out_bh(&mh, bh); } hdrsize = blk->height ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); if (amount > sbp->bsize - hdrsize - ptramt) amount = sbp->bsize - hdrsize - ptramt; memcpy(bh->b_data + hdrsize + ptramt, (char *)srcptr, amount); srcptr += amount; bmodified(bh); if (bh != ip->i_bh) brelse(bh); copied += amount; if (hdrsize + ptramt + amount >= sbp->bsize) { /* advance to the next metablock */ blk->mp.mp_list[blk->height] += (amount / sizeof(uint64_t)); for (h = blk->height; h > 0; h--) { if (blk->mp.mp_list[h] >= gfs2_inptrs) { blk->mp.mp_list[h] = 0; blk->mp.mp_list[h - 1]++; continue; } break; } } amount = size - copied; ptramt = 0; } } /* ------------------------------------------------------------------------- */ /* adjust_indirect_blocks - convert all gfs_indirect blocks to gfs2. */ /* */ /* This function converts all gfs_indirect blocks to GFS2. The difference */ /* is that gfs1 indirect block has a 64-byte chunk of reserved space that */ /* gfs2 does not. Since GFS block locations (relative to the start of the */ /* file have their locations defined by the offset from the end of the */ /* structure, all block pointers must be shifted. */ /* */ /* Stuffed inodes don't need to be shifted at since there are no indirect */ /* blocks. Inodes with height 1 don't need to be shifted either, because */ /* the dinode size is the same between gfs and gfs2 (232 bytes), and */ /* therefore you can fit the same number of block pointers after the dinode */ /* structure. For the normal 4K block size, that's 483 pointers. For 1K */ /* blocks, it's 99 pointers. */ /* */ /* At height 2 things get complex. GFS1 reserves an area of 64 (0x40) bytes */ /* at the start of the indirect block, so for 4K blocks, you can fit 501 */ /* pointers. GFS2 doesn't reserve that space, so you can fit 509 pointers. */ /* For 1K blocks, it's 117 pointers in GFS1 and 125 in GFS2. */ /* */ /* That means, for example, that if you have 4K blocks, a 946MB file will */ /* require a height of 3 for GFS, but only a height of 2 for GFS2. */ /* There isn't a good way to shift the pointers around from one height to */ /* another, so the only way to do it is to rebuild all those indirect blocks */ /* from empty ones. */ /* */ /* For example, with a 1K block size, if you do: */ /* */ /* dd if=/mnt/gfs/big of=/tmp/tocompare skip=496572346368 bs=1024 count=1 */ /* */ /* the resulting metadata paths will look vastly different for the data: */ /* */ /* height 0 1 2 3 4 5 */ /* GFS1: 0x16 0x4b 0x70 0x11 0x5e 0x48 */ /* GFS2: 0x10 0x21 0x78 0x05 0x14 0x76 */ /* */ /* To complicate matters, we can't really require free space. A user might */ /* be trying to migrate a "full" gfs1 file system to GFS2. After we */ /* convert the journals to GFS2, we might have more free space, so we can */ /* allocate blocks at that time. */ /* */ /* Assumes: GFS1 values are in place for diptrs and inptrs. */ /* */ /* Returns: 0 on success, -1 on failure */ /* */ /* Adapted from gfs2_fsck metawalk.c's build_and_check_metalist */ /* ------------------------------------------------------------------------- */ static void jdata_mp_gfs1_to_gfs2(struct gfs2_sbd *sbp, int gfs1_h, int gfs2_h, struct metapath *gfs1mp, struct metapath *gfs2mp, unsigned int *len, uint64_t dinode_size) { uint64_t offset; int h; uint64_t gfs1factor[GFS2_MAX_META_HEIGHT]; uint64_t gfs2factor[GFS2_MAX_META_HEIGHT]; /* figure out multiplication factors for each height - gfs1 */ memset(&gfs1factor, 0, sizeof(gfs1factor)); gfs1factor[gfs1_h - 1] = sbp->bsize - sizeof(struct gfs2_meta_header); for (h = gfs1_h - 1; h > 0; h--) gfs1factor[h - 1] = gfs1factor[h] * sbp->sd_inptrs; /* figure out multiplication factors for each height - gfs2 */ memset(&gfs2factor, 0, sizeof(gfs2factor)); gfs2factor[gfs2_h] = 1ull; gfs2factor[gfs2_h - 1] = sbp->bsize; for (h = gfs2_h - 1; h > 0; h--) gfs2factor[h - 1] = gfs2factor[h] * gfs2_inptrs; /* Convert from gfs1 to an offset */ offset = 0; for (h = 0; h < gfs1_h; h++) offset += (gfs1mp->mp_list[h] * gfs1factor[h]); if (dinode_size - offset < *len) *len = dinode_size - offset; /* Convert from an offset back to gfs2 */ memset(gfs2mp, 0, sizeof(*gfs2mp)); for (h = 0; h <= gfs2_h; h++) { /* Can't use do_div here because the factors are too large. */ gfs2mp->mp_list[h] = offset / gfs2factor[h]; offset %= gfs2factor[h]; } } static void fix_jdatatree(struct gfs2_sbd *sbp, struct gfs2_inode *ip, struct blocklist *blk, char *srcptr, unsigned int size) { uint64_t block; struct gfs2_buffer_head *bh; unsigned int amount, ptramt; int h, copied = 0, new = 0; struct gfs2_meta_header mh; mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_IN; mh.mh_format = GFS2_FORMAT_IN; if (!ip->i_di.di_height) unstuff_dinode(ip); ptramt = blk->mp.mp_list[blk->height]; amount = size; while (copied < size) { bh = ip->i_bh; /* First, build up the metatree */ for (h = 0; h < blk->height; h++) { new = 0; lookup_block(ip, bh, h, &blk->mp, 1, &new, &block); if (bh != ip->i_bh) brelse(bh); if (!block) break; bh = bread(sbp, block); if (new) memset(bh->b_data, 0, sbp->bsize); if (h < (blk->height - 1)) gfs2_meta_header_out_bh(&mh, bh); } if (amount > sbp->bsize - ptramt) amount = sbp->bsize - ptramt; memcpy(bh->b_data + ptramt, (char *)srcptr, amount); srcptr += amount; bmodified(bh); if (bh != ip->i_bh) brelse(bh); copied += amount; if (ptramt + amount >= sbp->bsize) { /* advance to the next metablock */ blk->mp.mp_list[blk->height] += amount; for (h = blk->height; h > 0; h--) { if (blk->mp.mp_list[h] >= gfs2_inptrs) { blk->mp.mp_list[h] = 0; blk->mp.mp_list[h - 1]++; continue; } break; } } amount = size - copied; ptramt = 0; } } static int get_inode_metablocks(struct gfs2_sbd *sbp, struct gfs2_inode *ip, struct blocklist *blocks) { struct blocklist *blk, *newblk; struct gfs2_buffer_head *bh, *dibh = ip->i_bh; osi_list_t *tmp; uint64_t *ptr1, block; int h, ptrnum; int bufsize = sbp->bsize - sizeof(struct gfs_indirect); /* Add dinode block to the list */ blk = malloc(sizeof(struct blocklist)); if (!blk) { log_crit(_("Error: Can't allocate memory for indirect block fix\n")); return -1; } memset(blk, 0, sizeof(*blk)); blk->block = dibh->b_blocknr; blk->ptrbuf = malloc(bufsize); if (!blk->ptrbuf) { log_crit(_("Error: Can't allocate memory" " for file conversion.\n")); free(blk); return -1; } memset(blk->ptrbuf, 0, bufsize); /* Fill in the pointers from the dinode buffer */ memcpy(blk->ptrbuf, dibh->b_data + sizeof(struct gfs_dinode), sbp->bsize - sizeof(struct gfs_dinode)); /* Zero out the pointers so we can fill them in later. */ memset(dibh->b_data + sizeof(struct gfs_dinode), 0, sbp->bsize - sizeof(struct gfs_dinode)); osi_list_add_prev(&blk->list, &blocks->list); /* Now run the metadata chain and build lists of all metadata blocks */ osi_list_foreach(tmp, &blocks->list) { blk = osi_list_entry(tmp, struct blocklist, list); if (blk->height >= ip->i_di.di_height - 1) continue; for (ptr1 = (uint64_t *)blk->ptrbuf, ptrnum = 0; ptrnum < sbp->sd_inptrs; ptr1++, ptrnum++) { if (!*ptr1) continue; block = be64_to_cpu(*ptr1); newblk = malloc(sizeof(struct blocklist)); if (!newblk) { log_crit(_("Error: Can't allocate memory for indirect block fix.\n")); return -1; } memset(newblk, 0, sizeof(*newblk)); newblk->ptrbuf = malloc(bufsize); if (!newblk->ptrbuf) { /* FIXME: This message should be different, to not conflit with the above file conversion */ log_crit(_("Error: Can't allocate memory for file conversion.\n")); free(newblk); return -1; } memset(newblk->ptrbuf, 0, bufsize); newblk->block = block; newblk->height = blk->height + 1; /* Build the metapointer list from our predecessors */ for (h = 0; h < blk->height; h++) newblk->mp.mp_list[h] = blk->mp.mp_list[h]; newblk->mp.mp_list[h] = ptrnum; /* Queue it to be processed later on in the loop. */ osi_list_add_prev(&newblk->list, &blocks->list); /* read the new metadata block's pointers */ bh = bread(sbp, block); memcpy(newblk->ptrbuf, bh->b_data + sizeof(struct gfs_indirect), bufsize); /* Zero the buffer so we can fill it in later */ memset(bh->b_data + sizeof(struct gfs_indirect), 0, bufsize); bmodified(bh); brelse(bh); /* Free the block so we can reuse it. This allows us to convert a "full" file system. */ ip->i_di.di_blocks--; gfs2_free_block(sbp, block); } } return 0; } static int fix_ind_reg_or_dir(struct gfs2_sbd *sbp, struct gfs2_inode *ip, uint32_t di_height, uint32_t gfs2_hgt, struct blocklist *blk, struct blocklist *blocks) { unsigned int len, bufsize; uint64_t *ptr1, *ptr2; int ptrnum; struct metapath gfs2mp; bufsize = sbp->bsize - sizeof(struct gfs_indirect); len = bufsize; /* Skip zero pointers at the start of the buffer. This may seem pointless, but the gfs1 blocks won't align with the gfs2 blocks. That means that a single block write of gfs1's pointers is likely to span two blocks on gfs2. That's a problem if the file system is full. So I'm trying to truncate the data at the start and end of the buffers (i.e. write only what we need to). */ for (ptr1 = (uint64_t *)blk->ptrbuf, ptrnum = 0; ptrnum < sbp->sd_inptrs; ptr1++, ptrnum++) { if (*ptr1 != 0x00) break; len -= sizeof(uint64_t); } /* Skip zero bytes at the end of the buffer */ ptr2 = (uint64_t *)(blk->ptrbuf + bufsize) - 1; while (len > 0 && *ptr2 == 0) { ptr2--; len -= sizeof(uint64_t); } blk->mp.mp_list[di_height - 1] = ptrnum; mp_gfs1_to_gfs2(sbp, di_height, gfs2_hgt, &blk->mp, &gfs2mp); memcpy(&blk->mp, &gfs2mp, sizeof(struct metapath)); blk->height -= di_height - gfs2_hgt; if (len) fix_metatree(sbp, ip, blk, ptr1, len); return 0; } static int fix_ind_jdata(struct gfs2_sbd *sbp, struct gfs2_inode *ip, uint32_t di_height, uint32_t gfs2_hgt, uint64_t dinode_size, struct blocklist *blk, struct blocklist *blocks) { /*FIXME: Messages here should be different, to not conflit with messages in get_inode_metablocks */ struct blocklist *newblk; unsigned int len, bufsize; uint64_t *ptr1, block; int ptrnum, h; struct metapath gfs2mp; struct gfs2_buffer_head *bh; bufsize = sbp->bsize - sizeof(struct gfs2_meta_header); /* * For each metadata block that holds jdata block pointers, * get the blk pointers and copy them block by block */ for (ptr1 = (uint64_t *) blk->ptrbuf, ptrnum = 0; ptrnum < sbp->sd_inptrs; ptr1++, ptrnum++) { if (!*ptr1) continue; block = be64_to_cpu(*ptr1); newblk = malloc(sizeof(struct blocklist)); if (!newblk) { log_crit(_("Error: Can't allocate memory for indirect block fix.\n")); return -1; } memset(newblk, 0, sizeof(*newblk)); newblk->ptrbuf = malloc(bufsize); if (!newblk->ptrbuf) { log_crit(_("Error: Can't allocate memory for file conversion.\n")); free(newblk); return -1; } memset(newblk->ptrbuf, 0, bufsize); newblk->block = block; newblk->height = blk->height + 1; /* Build the metapointer list from our predecessors */ for (h=0; h < blk->height; h++) newblk->mp.mp_list[h] = blk->mp.mp_list[h]; newblk->mp.mp_list[h] = ptrnum; bh = bread(sbp, block); /* This is a data block. i.e newblk->height == ip->i_di.di_height */ /* read in the jdata block */ memcpy(newblk->ptrbuf, bh->b_data + sizeof(struct gfs2_meta_header), bufsize); memset(bh->b_data + sizeof(struct gfs2_meta_header), 0, bufsize); bmodified(bh); brelse(bh); /* Free the block so we can reuse it. This allows us to convert a "full" file system */ ip->i_di.di_blocks--; gfs2_free_block(sbp, block); len = bufsize; jdata_mp_gfs1_to_gfs2(sbp, di_height, gfs2_hgt, &newblk->mp, &gfs2mp, &len, dinode_size); memcpy(&newblk->mp, &gfs2mp, sizeof(struct metapath)); newblk->height -= di_height - gfs2_hgt; if (len) fix_jdatatree(sbp, ip, newblk, newblk->ptrbuf, len); free(newblk->ptrbuf); free(newblk); } return 0; } static int adjust_indirect_blocks(struct gfs2_sbd *sbp, struct gfs2_inode *ip) { uint64_t dinode_size; uint32_t gfs2_hgt, di_height; osi_list_t *tmp=NULL, *x; struct blocklist blocks, *blk; int error = 0; int isdir = S_ISDIR(ip->i_di.di_mode); /* is always jdata */ int isjdata = ((GFS2_DIF_JDATA & ip->i_di.di_flags) && !isdir); int isreg = (!isjdata && !isdir); /* regular files and dirs are same upto height=2 jdata files (not dirs) are same only when height=0 */ if (((isreg||isdir) && ip->i_di.di_height <= 1) || (isjdata && ip->i_di.di_height == 0)) return 0; /* nothing to do */ osi_list_init(&blocks.list); error = get_inode_metablocks(sbp, ip, &blocks); if (error) goto out; /* The gfs2 height may be different. We need to rebuild the metadata tree to the gfs2 height. */ gfs2_hgt = calc_gfs2_tree_height(ip, ip->i_di.di_size); /* Save off the size because we're going to empty the contents and add the data blocks back in later. */ dinode_size = ip->i_di.di_size; ip->i_di.di_size = 0ULL; di_height = ip->i_di.di_height; ip->i_di.di_height = 0; /* Now run through the block list a second time. If the block is a data block, rewrite the data to the gfs2 offset. */ osi_list_foreach_safe(tmp, &blocks.list, x) { blk = osi_list_entry(tmp, struct blocklist, list); /* If it's not metadata that holds data block pointers (i.e. metadata pointing to other metadata) */ if (blk->height != di_height - 1) { osi_list_del(tmp); free(blk->ptrbuf); free(blk); continue; } if (isreg || isdir) /* more or less same way to deal with either */ error = fix_ind_reg_or_dir(sbp, ip, di_height, gfs2_hgt, blk, &blocks); else if (isjdata) error = fix_ind_jdata(sbp, ip, di_height, gfs2_hgt, dinode_size, blk, &blocks); if (error) goto out; osi_list_del(tmp); free(blk->ptrbuf); free(blk); } ip->i_di.di_size = dinode_size; /* Set the new dinode height, which may or may not have changed. */ /* The caller will take it from the ip and write it to the buffer */ ip->i_di.di_height = gfs2_hgt; return error; out: while (!osi_list_empty(&blocks.list)) { blk = osi_list_entry(tmp, struct blocklist, list); osi_list_del(&blocks.list); free(blk->ptrbuf); free(blk); } return error; } const char *cdpn[14] = {"{hostname}", "{mach}", "{os}", "{uid}", "{gid}", "{sys}", "{jid}", "@hostname", "@mach", "@os", "@uid", "@gid", "@sys", "@jid"}; static int has_cdpn(const char *str) { int i; for (i=0; i<14; i++) if (strstr(str, cdpn[i]) != NULL) return 1; return 0; } static int fix_cdpn_symlink(struct gfs2_sbd *sbp, struct gfs2_buffer_head *bh, struct gfs2_inode *ip) { char *linkptr = NULL; if (ip->i_di.di_height != 0) return 0; linkptr = bh->b_data + sizeof(struct gfs_dinode); if (has_cdpn(linkptr)) { struct inode_dir_block *fix; /* Save the symlink di_addr. We'll find the parent di_addr later */ fix = malloc(sizeof(struct inode_dir_block)); if (!fix) { log_crit(_("Error: out of memory.\n")); return -1; } memset(fix, 0, sizeof(struct inode_dir_block)); fix->di_addr = ip->i_di.di_num.no_addr; osi_list_add_prev((osi_list_t *)&fix->list, (osi_list_t *)&cdpns_to_fix); } return 0; } /* * fix_xattr - * Extended attributes can be either direct (in the ip->i_di.di_eattr block) or * then can be at a maximum of 1 indirect level. Multiple levels of indirection * are not supported. If the di_eattr block contains extended attribute data, * i.e block type = GFS_METATYPE_EA, we ignore it. * If the di_eattr block contains block pointers to extended attributes we need * to fix the header. gfs1 uses gfs_indirect as the header which is 64 bytes * bigger than gfs2_meta_header that gfs2 uses. */ static int fix_xattr(struct gfs2_sbd *sbp, struct gfs2_buffer_head *bh, struct gfs2_inode *ip) { int len, old_hdr_sz, new_hdr_sz; struct gfs2_buffer_head *eabh; char *buf; /* Read in the i_di.di_eattr block */ eabh = bread(sbp, ip->i_di.di_eattr); if (!gfs2_check_meta(eabh, GFS_METATYPE_IN)) {/* if it is an indirect block */ len = sbp->bsize - sizeof(struct gfs_indirect); buf = malloc(len); if (!buf) { /*FIXME: Same message as fix_cdpn_symlink */ log_crit(_("Error: out of memory.\n")); return -1; } old_hdr_sz = sizeof(struct gfs_indirect); new_hdr_sz = sizeof(struct gfs2_meta_header); memcpy(buf, eabh->b_data + old_hdr_sz, sbp->bsize - old_hdr_sz); memset(eabh->b_data + new_hdr_sz, 0, sbp->bsize - new_hdr_sz); memcpy(eabh->b_data + new_hdr_sz, buf, len); free(buf); bmodified(eabh); } brelse(eabh); return 0; } /* ------------------------------------------------------------------------- */ /* adjust_inode - change an inode from gfs1 to gfs2 */ /* */ /* Returns: 0 on success, -1 on failure */ /* ------------------------------------------------------------------------- */ static int adjust_inode(struct gfs2_sbd *sbp, struct gfs2_buffer_head *bh) { struct gfs2_inode *inode; struct inode_block *fixdir; int inode_was_gfs1; inode = lgfs2_gfs_inode_get(sbp, bh); if (inode == NULL) { log_crit(_("Error reading inode: %s\n"), strerror(errno)); return -1; } inode_was_gfs1 = (inode->i_di.di_num.no_formal_ino == inode->i_di.di_num.no_addr); /* Fix the inode number: */ inode->i_di.di_num.no_formal_ino = sbp->md.next_inum; /* Fix the inode type: gfs1 uses di_type, gfs2 uses di_mode. */ inode->i_di.di_mode &= ~S_IFMT; switch (inode->i_di.__pad1) { /* formerly di_type */ case GFS_FILE_DIR: /* directory */ inode->i_di.di_mode |= S_IFDIR; /* Add this directory to the list of dirs to fix later. */ fixdir = malloc(sizeof(struct inode_block)); if (!fixdir) { /*FIXME: Same message as fix_cdpn_symlink */ log_crit(_("Error: out of memory.\n")); return -1; } memset(fixdir, 0, sizeof(struct inode_block)); fixdir->di_addr = inode->i_di.di_num.no_addr; osi_list_add_prev((osi_list_t *)&fixdir->list, (osi_list_t *)&dirs_to_fix); break; case GFS_FILE_REG: /* regular file */ inode->i_di.di_mode |= S_IFREG; break; case GFS_FILE_LNK: /* symlink */ inode->i_di.di_mode |= S_IFLNK; break; case GFS_FILE_BLK: /* block device */ inode->i_di.di_mode |= S_IFBLK; break; case GFS_FILE_CHR: /* character device */ inode->i_di.di_mode |= S_IFCHR; break; case GFS_FILE_FIFO: /* fifo / pipe */ inode->i_di.di_mode |= S_IFIFO; break; case GFS_FILE_SOCK: /* socket */ inode->i_di.di_mode |= S_IFSOCK; break; } /* ----------------------------------------------------------- */ /* gfs2 inodes are slightly different from gfs1 inodes in that */ /* di_goal_meta has shifted locations and di_goal_data has */ /* changed from 32-bits to 64-bits. The following code */ /* adjusts for the shift. */ /* */ /* Note: It may sound absurd, but we need to check if this */ /* inode has already been converted to gfs2 or if it's */ /* still a gfs1 inode. That's just in case there was a */ /* prior attempt to run gfs2_convert that never finished */ /* (due to power out, ctrl-c, kill, segfault, whatever.) */ /* If it is unconverted gfs1 we want to do a full */ /* conversion. If it's a gfs2 inode from a prior run, */ /* we still need to renumber the inode, but here we */ /* don't want to shift the data around. */ /* ----------------------------------------------------------- */ if (inode_was_gfs1) { struct gfs_dinode *gfs1_dinode_struct; int ret = 0; gfs1_dinode_struct = (struct gfs_dinode *)&inode->i_di; inode->i_di.di_goal_meta = inode->i_di.di_goal_data; inode->i_di.di_goal_data = 0; /* make sure the upper 32b are 0 */ inode->i_di.di_goal_data = gfs1_dinode_struct->di_goal_dblk; inode->i_di.di_generation = 0; if (adjust_indirect_blocks(sbp, inode)) return -1; /* Check for cdpns */ if (S_ISLNK(inode->i_di.di_mode)) { ret = fix_cdpn_symlink(sbp, bh, inode); if (ret) return -1; } /* Check for extended attributes */ if (inode->i_di.di_eattr) { ret = fix_xattr(sbp, bh, inode); if (ret) return -1; } } bmodified(inode->i_bh); inode_put(&inode); /* does gfs2_dinode_out if modified */ sbp->md.next_inum++; /* update inode count */ return 0; } /* adjust_inode */ static int next_rg_meta(struct rgrp_tree *rgd, uint64_t *block, int first) { struct gfs2_bitmap *bits = NULL; uint32_t length = rgd->ri.ri_length; uint32_t blk = (first)? 0: (uint32_t)((*block + 1) - rgd->ri.ri_data0); int i; if (!first && (*block < rgd->ri.ri_data0)) { fprintf(stderr, "next_rg_meta: Start block is outside rgrp bounds.\n"); exit(1); } for (i = 0; i < length; i++){ bits = &rgd->bits[i]; if (blk < bits->bi_len * GFS2_NBBY) break; blk -= bits->bi_len * GFS2_NBBY; } for (; i < length; i++){ bits = &rgd->bits[i]; blk = gfs2_bitfit((unsigned char *)rgd->bh[i]->b_data + bits->bi_offset, bits->bi_len, blk, GFS2_BLKST_DINODE); if(blk != BFITNOENT){ *block = blk + (bits->bi_start * GFS2_NBBY) + rgd->ri.ri_data0; break; } blk = 0; } if (i == length) return -1; return 0; } static int next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uint64_t *block, uint32_t type, int first) { struct gfs2_buffer_head *bh = NULL; do{ if (bh) brelse(bh); if (next_rg_meta(rgd, block, first)) return -1; bh = bread(sdp, *block); first = 0; } while(gfs2_check_meta(bh, type)); brelse(bh); return 0; } /* ------------------------------------------------------------------------- */ /* inode_renumber - renumber the inodes */ /* */ /* In gfs1, the inode number WAS the inode address. In gfs2, the inodes are */ /* numbered sequentially. */ /* */ /* Returns: 0 on success, -1 on failure */ /* ------------------------------------------------------------------------- */ static int inode_renumber(struct gfs2_sbd *sbp, uint64_t root_inode_addr, osi_list_t *cdpn_to_fix) { struct rgrp_tree *rgd; struct osi_node *n, *next = NULL; uint64_t block = 0; struct gfs2_buffer_head *bh; int first; int error = 0; int rgs_processed = 0; log_notice(_("Converting inodes.\n")); sbp->md.next_inum = 1; /* starting inode numbering */ gettimeofday(&tv, NULL); seconds = tv.tv_sec; /* ---------------------------------------------------------------- */ /* Traverse the resource groups to figure out where the inodes are. */ /* ---------------------------------------------------------------- */ for (n = osi_first(&sbp->rgtree); n; n = next) { next = osi_next(n); rgd = (struct rgrp_tree *)n; rgs_processed++; first = 1; while (1) { /* for all inodes in the resource group */ gettimeofday(&tv, NULL); /* Put out a warm, fuzzy message every second so the customer */ /* doesn't think we hung. (This may take a long time). */ if (tv.tv_sec - seconds) { seconds = tv.tv_sec; log_notice(_("\r%llu inodes from %d rgs converted."), (unsigned long long)sbp->md.next_inum, rgs_processed); fflush(stdout); } /* Get the next metadata block. Break out if we reach the end. */ /* We have to check all metadata blocks because the bitmap may */ /* be "11" (used meta) for both inodes and indirect blocks. */ /* We need to process the inodes and change the indirect blocks */ /* to have a bitmap type of "01" (data). */ if (next_rg_metatype(sbp, rgd, &block, 0, first)) break; /* If this is the root inode block, remember it for later: */ if (block == root_inode_addr) { sbp->sd_sb.sb_root_dir.no_addr = block; sbp->sd_sb.sb_root_dir.no_formal_ino = sbp->md.next_inum; } bh = bread(sbp, block); if (!gfs2_check_meta(bh, GFS_METATYPE_DI)) {/* if it is an dinode */ /* Skip the rindex and jindex inodes for now. */ if (block != rindex_addr && block != jindex_addr) error = adjust_inode(sbp, bh); if (error) { return error; } } else { /* It's metadata, but not an inode, so fix the bitmap. */ int blk, buf_offset; int bitmap_byte; /* byte within the bitmap to fix */ int byte_bit; /* bit within the byte */ /* Figure out the absolute bitmap byte we need to fix. */ /* ignoring structure offsets and bitmap blocks for now. */ bitmap_byte = (block - rgd->ri.ri_data0) / GFS2_NBBY; byte_bit = (block - rgd->ri.ri_data0) % GFS2_NBBY; /* Now figure out which bitmap block the byte is on */ for (blk = 0; blk < rgd->ri.ri_length; blk++) { /* figure out offset of first bitmap byte for this map: */ buf_offset = (blk) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_rgrp); /* if it's on this page */ if (buf_offset + bitmap_byte < sbp->bsize) { rgd->bh[blk]->b_data[buf_offset + bitmap_byte] &= ~(0x03 << (GFS2_BIT_SIZE * byte_bit)); rgd->bh[blk]->b_data[buf_offset + bitmap_byte] |= (0x01 << (GFS2_BIT_SIZE * byte_bit)); bmodified(rgd->bh[blk]); break; } bitmap_byte -= (sbp->bsize - buf_offset); } } brelse(bh); first = 0; } /* while 1 */ } /* for all rgs */ log_notice(_("\r%llu inodes from %d rgs converted."), (unsigned long long)sbp->md.next_inum, rgs_processed); fflush(stdout); return 0; }/* inode_renumber */ /* ------------------------------------------------------------------------- */ /* fetch_inum - fetch an inum entry from disk, given its block */ /* ------------------------------------------------------------------------- */ static int fetch_inum(struct gfs2_sbd *sbp, uint64_t iblock, struct gfs2_inum *inum, uint64_t *eablk) { struct gfs2_inode *fix_inode; fix_inode = lgfs2_inode_read(sbp, iblock); if (fix_inode == NULL) return 1; inum->no_formal_ino = fix_inode->i_di.di_num.no_formal_ino; inum->no_addr = fix_inode->i_di.di_num.no_addr; if (eablk) *eablk = fix_inode->i_di.di_eattr; inode_put(&fix_inode); return 0; }/* fetch_inum */ /* ------------------------------------------------------------------------- */ /* process_dirent_info - fix one dirent (directory entry) buffer */ /* */ /* We changed inode numbers, so we must update that number into the */ /* directory entries themselves. */ /* */ /* Returns: 0 on success, -1 on failure, -EISDIR when dentmod marked DT_DIR */ /* ------------------------------------------------------------------------- */ static int process_dirent_info(struct gfs2_inode *dip, struct gfs2_sbd *sbp, struct gfs2_buffer_head *bh, int dir_entries, uint64_t dentmod) { int error = 0; struct gfs2_dirent *dent; int de; /* directory entry index */ error = gfs2_dirent_first(dip, bh, &dent); if (error != IS_LEAF && error != IS_DINODE) { log_crit(_("Error retrieving directory.\n")); return -1; } error = 0; /* Go through every dirent in the buffer and process it. */ /* Turns out you can't trust dir_entries is correct. */ for (de = 0; ; de++) { struct gfs2_inum inum; int dent_was_gfs1; if (dentmod) { if (dent->de_type == cpu_to_be16(DT_LNK) && cpu_to_be64(dent->de_inum.no_addr) == dentmod) { dent->de_type = cpu_to_be16(DT_DIR); error = -EISDIR; break; } goto skip_next; } gettimeofday(&tv, NULL); /* Do more warm fuzzy stuff for the customer. */ dirents_fixed++; if (tv.tv_sec - seconds) { seconds = tv.tv_sec; log_notice(_("\r%llu directories, %llu dirents fixed."), (unsigned long long)dirs_fixed, (unsigned long long)dirents_fixed); fflush(stdout); } /* fix the dirent's inode number based on the inode */ gfs2_inum_in(&inum, (char *)&dent->de_inum); dent_was_gfs1 = (dent->de_inum.no_addr == dent->de_inum.no_formal_ino); if (inum.no_formal_ino) { /* if not a sentinel (placeholder) */ error = fetch_inum(sbp, inum.no_addr, &inum, NULL); if (error) { log_crit(_("Error retrieving inode 0x%llx\n"), (unsigned long long)inum.no_addr); break; } /* fix the dirent's inode number from the fetched inum. */ dent->de_inum.no_formal_ino = cpu_to_be64(inum.no_formal_ino); } /* Fix the dirent's filename hash: They are the same as gfs1 */ /* dent->de_hash = cpu_to_be32(gfs2_disk_hash((char *)(dent + 1), */ /* be16_to_cpu(dent->de_name_len))); */ /* Fix the dirent's file type. Gfs1 used home-grown values. */ /* Gfs2 uses standard values from include/linux/fs.h */ /* Only do this if the dent was a true gfs1 dent, and not a */ /* gfs2 dent converted from a previously aborted run. */ if (dent_was_gfs1) { switch be16_to_cpu(dent->de_type) { case GFS_FILE_NON: dent->de_type = cpu_to_be16(DT_UNKNOWN); break; case GFS_FILE_REG: /* regular file */ dent->de_type = cpu_to_be16(DT_REG); break; case GFS_FILE_DIR: /* directory */ dent->de_type = cpu_to_be16(DT_DIR); break; case GFS_FILE_LNK: /* link */ dent->de_type = cpu_to_be16(DT_LNK); break; case GFS_FILE_BLK: /* block device node */ dent->de_type = cpu_to_be16(DT_BLK); break; case GFS_FILE_CHR: /* character device node */ dent->de_type = cpu_to_be16(DT_CHR); break; case GFS_FILE_FIFO: /* fifo/pipe */ dent->de_type = cpu_to_be16(DT_FIFO); break; case GFS_FILE_SOCK: /* socket */ dent->de_type = cpu_to_be16(DT_SOCK); break; } } /* * Compare this dirent address with every one in the * cdpns_to_fix list to find if this directory (dip) is * a cdpn symlink's parent. If so add it to the list element */ if (dent->de_type == cpu_to_be16(DT_LNK)) { osi_list_t *tmp; struct inode_dir_block *fix; osi_list_foreach(tmp, &cdpns_to_fix.list) { fix = osi_list_entry(tmp, struct inode_dir_block, list); if (fix->di_addr == inum.no_addr) fix->di_paddr = dip->i_di.di_num.no_addr; } } skip_next: error = gfs2_dirent_next(dip, bh, &dent); if (error) { if (error == -ENOENT) /* beyond the end of this bh */ error = 0; break; } } /* for every directory entry */ return error; }/* process_dirent_info */ /* ------------------------------------------------------------------------- */ /* fix_one_directory_exhash - fix one directory's inode numbers. */ /* */ /* This is for exhash directories, where the inode has a list of "leaf" */ /* blocks, each of which is a buffer full of dirents that must be processed. */ /* */ /* Returns: 0 on success, -1 on failure */ /* ------------------------------------------------------------------------- */ static int fix_one_directory_exhash(struct gfs2_sbd *sbp, struct gfs2_inode *dip, uint64_t dentmod) { struct gfs2_buffer_head *bh_leaf; int error; uint64_t leaf_block, prev_leaf_block; uint32_t leaf_num; prev_leaf_block = 0; /* for all the leafs, get the leaf block and process the dirents inside */ for (leaf_num = 0; ; leaf_num++) { uint64_t buf; struct gfs2_leaf leaf; error = gfs2_readi(dip, (char *)&buf, leaf_num * sizeof(uint64_t), sizeof(uint64_t)); if (!error) /* end of file */ return 0; /* success */ else if (error != sizeof(uint64_t)) { log_crit(_("fix_one_directory_exhash: error reading directory.\n")); return -1; } else { leaf_block = be64_to_cpu(buf); error = 0; } leaf_chain: /* leaf blocks may be repeated, so skip the duplicates: */ if (leaf_block == prev_leaf_block) /* same block? */ continue; /* already converted */ prev_leaf_block = leaf_block; /* read the leaf buffer in */ error = gfs2_get_leaf(dip, leaf_block, &bh_leaf); if (error) { log_crit(_("Error reading leaf %llx\n"), (unsigned long long)leaf_block); break; } gfs2_leaf_in(&leaf, bh_leaf); /* buffer to structure */ error = process_dirent_info(dip, sbp, bh_leaf, leaf.lf_entries, dentmod); bmodified(bh_leaf); brelse(bh_leaf); if (dentmod && error == -EISDIR) /* dentmod was marked DT_DIR, break out */ break; if (leaf.lf_next) { /* leaf has a leaf chain, process leaves in chain */ leaf_block = leaf.lf_next; error = 0; goto leaf_chain; } } /* for leaf_num */ return 0; }/* fix_one_directory_exhash */ static int process_directory(struct gfs2_sbd *sbp, uint64_t dirblock, uint64_t dentmod) { struct gfs2_inode *dip; int error = 0; /* read in the directory inode */ dip = lgfs2_inode_read(sbp, dirblock); if (dip == NULL) return -1; /* fix the directory: either exhash (leaves) or linear (stuffed) */ if (dip->i_di.di_flags & GFS2_DIF_EXHASH) { if (fix_one_directory_exhash(sbp, dip, dentmod)) { log_crit(_("Error fixing exhash directory.\n")); inode_put(&dip); return -1; } } else { error = process_dirent_info(dip, sbp, dip->i_bh, dip->i_di.di_entries, dentmod); if (error && error != -EISDIR) { log_crit(_("Error fixing linear directory.\n")); inode_put(&dip); return -1; } } bmodified(dip->i_bh); inode_put(&dip); return 0; } /* ------------------------------------------------------------------------- */ /* fix_directory_info - sync new inode numbers with directory info */ /* Returns: 0 on success, -1 on failure */ /* ------------------------------------------------------------------------- */ static int fix_directory_info(struct gfs2_sbd *sbp, osi_list_t *dir_to_fix) { osi_list_t *tmp, *fix; struct inode_block *dir_iblk; uint64_t dirblock; uint32_t gfs1_inptrs = sbp->sd_inptrs; /* Directory inodes have been converted to gfs2, use gfs2 inptrs */ sbp->sd_inptrs = (sbp->bsize - sizeof(struct gfs2_meta_header)) / sizeof(uint64_t); dirs_fixed = 0; dirents_fixed = 0; gettimeofday(&tv, NULL); seconds = tv.tv_sec; log_notice(_("\nFixing file and directory information.\n")); fflush(stdout); tmp = NULL; /* for every directory in the list */ for (fix = dir_to_fix->next; fix != dir_to_fix; fix = fix->next) { if (tmp) { osi_list_del(tmp); free(tmp); } tmp = fix; /* remember the addr to free next time */ dirs_fixed++; /* figure out the directory inode block and read it in */ dir_iblk = (struct inode_block *)fix; dirblock = dir_iblk->di_addr; /* addr of dir inode */ if (process_directory(sbp, dirblock, 0)) { log_crit(_("Error processing directory\n")); return -1; } } /* Free the last entry in memory: */ if (tmp) { osi_list_del(tmp); free(tmp); } sbp->sd_inptrs = gfs1_inptrs; return 0; }/* fix_directory_info */ /* ------------------------------------------------------------------------- */ /* fix_cdpn_symlinks - convert cdpn symlinks to empty directories */ /* Returns: 0 on success, -1 on failure */ /* ------------------------------------------------------------------------- */ static int fix_cdpn_symlinks(struct gfs2_sbd *sbp, osi_list_t *cdpn_to_fix) { osi_list_t *tmp, *x; int error = 0; cdpns_fixed = 0; osi_list_foreach_safe(tmp, cdpn_to_fix, x) { struct gfs2_inum fix, dir; struct inode_dir_block *l_fix; struct gfs2_buffer_head *bh; struct gfs2_inode *fix_inode; uint64_t eablk; l_fix = osi_list_entry(tmp, struct inode_dir_block, list); osi_list_del(tmp); /* convert symlink to empty dir */ error = fetch_inum(sbp, l_fix->di_addr, &fix, &eablk); if (error) { log_crit(_("Error retrieving inode at block %llx\n"), (unsigned long long)l_fix->di_addr); break; } error = fetch_inum(sbp, l_fix->di_paddr, &dir, NULL); if (error) { log_crit(_("Error retrieving inode at block %llx\n"), (unsigned long long)l_fix->di_paddr); break; } /* initialize the symlink inode to be a directory */ bh = init_dinode(sbp, &fix, S_IFDIR | 0755, 0, &dir); fix_inode = lgfs2_inode_get(sbp, bh); if (fix_inode == NULL) return -1; fix_inode->i_di.di_eattr = eablk; /*fix extended attribute */ inode_put(&fix_inode); bmodified(bh); brelse(bh); /* fix the parent directory dirent entry for this inode */ error = process_directory(sbp, l_fix->di_paddr, l_fix->di_addr); if (error) { log_crit(_("Error trying to fix cdpn dentry\n")); break; } free(l_fix); cdpns_fixed++; } return error; } /* fix_cdpn_symlinks */ /* ------------------------------------------------------------------------- */ /* Fetch gfs1 jindex structure from buffer */ /* ------------------------------------------------------------------------- */ static void gfs1_jindex_in(struct gfs1_jindex *jindex, char *buf) { struct gfs1_jindex *str = (struct gfs1_jindex *)buf; jindex->ji_addr = be64_to_cpu(str->ji_addr); jindex->ji_nsegment = be32_to_cpu(str->ji_nsegment); memset(jindex->ji_reserved, 0, 64); } /* ------------------------------------------------------------------------- */ /* read_gfs1_jiindex - read the gfs1 jindex file. */ /* Returns: 0 on success, -1 on failure */ /* ------------------------------------------------------------------------- */ static int read_gfs1_jiindex(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = sdp->md.jiinode; char buf[sizeof(struct gfs1_jindex)]; unsigned int j; int error=0; unsigned int tmp_mode = 0; if(ip->i_di.di_size % sizeof(struct gfs1_jindex) != 0){ log_crit(_("The size reported in the journal index" " inode is not a\n" "\tmultiple of the size of a journal index.\n")); return -1; } if(!(sd_jindex = (struct gfs1_jindex *)malloc(ip->i_di.di_size))) { log_crit(_("Unable to allocate journal index\n")); return -1; } if(!memset(sd_jindex, 0, ip->i_di.di_size)) { log_crit(_("Unable to zero journal index\n")); return -1; } /* ugly hack * Faking the gfs1_jindex inode as a directory to gfs2_readi * so it skips the metaheader struct in the data blocks * in the inode. gfs2_jindex inode doesn't have metaheaders * in the data blocks */ tmp_mode = ip->i_di.di_mode; ip->i_di.di_mode &= ~S_IFMT; ip->i_di.di_mode |= S_IFDIR; for (j = 0; ; j++) { struct gfs1_jindex *journ; error = gfs2_readi(ip, buf, j * sizeof(struct gfs1_jindex), sizeof(struct gfs1_jindex)); if(!error) break; if (error != sizeof(struct gfs1_jindex)){ log_crit(_("An error occurred while reading the" " journal index file.\n")); goto fail; } journ = sd_jindex + j; gfs1_jindex_in(journ, buf); sdp->jsize = (journ->ji_nsegment * 16 * sdp->bsize) >> 20; } ip->i_di.di_mode = tmp_mode; if(j * sizeof(struct gfs1_jindex) != ip->i_di.di_size){ log_crit(_("journal inode size invalid\n")); goto fail; } sdp->md.journals = sdp->orig_journals = j; return 0; fail: free(sd_jindex); return -1; } /* ------------------------------------------------------------------------- */ /* init - initialization code */ /* Returns: 0 on success, -1 on failure */ /* ------------------------------------------------------------------------- */ static int init(struct gfs2_sbd *sbp) { struct gfs2_buffer_head *bh; int rgcount; struct gfs2_inum inum; memset(sbp, 0, sizeof(struct gfs2_sbd)); if ((sbp->device_fd = open(device, O_RDWR)) < 0) { perror(device); exit(-1); } /* --------------------------------- */ /* initialize the incore superblock */ /* --------------------------------- */ sbp->sd_sb.sb_header.mh_magic = GFS2_MAGIC; sbp->sd_sb.sb_header.mh_type = GFS2_METATYPE_SB; sbp->sd_sb.sb_header.mh_format = GFS2_FORMAT_SB; osi_list_init((osi_list_t *)&dirs_to_fix); osi_list_init((osi_list_t *)&cdpns_to_fix); /* ---------------------------------------------- */ /* Initialize lists and read in the superblock. */ /* ---------------------------------------------- */ sbp->jsize = GFS2_DEFAULT_JSIZE; sbp->rgsize = GFS2_DEFAULT_RGSIZE; sbp->qcsize = GFS2_DEFAULT_QCSIZE; sbp->time = time(NULL); sbp->blks_total = 0; /* total blocks - total them up later */ sbp->blks_alloced = 0; /* blocks allocated - total them up later */ sbp->dinodes_alloced = 0; /* dinodes allocated - total them up later */ sbp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE; sbp->bsize = sbp->sd_sb.sb_bsize; sbp->rgtree.osi_node = NULL; if (compute_constants(sbp)) { log_crit(_("Error: Bad constants (1)\n")); exit(-1); } bh = bread(sbp, GFS2_SB_ADDR >> sbp->sd_fsb2bb_shift); memcpy(&raw_gfs1_ondisk_sb, (struct gfs1_sb *)bh->b_data, sizeof(struct gfs1_sb)); gfs2_sb_in(&sbp->sd_sb, bh); jindex_addr = be64_to_cpu(raw_gfs1_ondisk_sb.sb_jindex_di.no_addr); rindex_addr = be64_to_cpu(raw_gfs1_ondisk_sb.sb_rindex_di.no_addr); sbp->bsize = sbp->sd_sb.sb_bsize; sbp->fssize = lseek(sbp->device_fd, 0, SEEK_END) / sbp->sd_sb.sb_bsize; sbp->sd_inptrs = (sbp->bsize - sizeof(struct gfs_indirect)) / sizeof(uint64_t); sbp->sd_diptrs = (sbp->bsize - sizeof(struct gfs_dinode)) / sizeof(uint64_t); sbp->sd_jbsize = sbp->bsize - sizeof(struct gfs2_meta_header); brelse(bh); if (compute_heightsize(sbp, sbp->sd_heightsize, &sbp->sd_max_height, sbp->bsize, sbp->sd_diptrs, sbp->sd_inptrs)) { log_crit(_("Error: Bad constants (1)\n")); exit(-1); } if (compute_heightsize(sbp, sbp->sd_jheightsize, &sbp->sd_max_jheight, sbp->sd_jbsize, sbp->sd_diptrs, sbp->sd_inptrs)) { log_crit(_("Error: Bad constants (1)\n")); exit(-1); } /* -------------------------------------------------------- */ /* Our constants are for gfs1. Need some for gfs2 as well. */ /* -------------------------------------------------------- */ gfs2_inptrs = (sbp->bsize - sizeof(struct gfs2_meta_header)) / sizeof(uint64_t); /* How many ptrs can we fit on a block? */ memset(gfs2_heightsize, 0, sizeof(gfs2_heightsize)); if (compute_heightsize(sbp, gfs2_heightsize, &gfs2_max_height, sbp->bsize, sbp->sd_diptrs, gfs2_inptrs)) { log_crit(_("Error: Bad constants (1)\n")); exit(-1); } memset(gfs2_jheightsize, 0, sizeof(gfs2_jheightsize)); if (compute_heightsize(sbp, gfs2_jheightsize, &gfs2_max_jheight, sbp->sd_jbsize, sbp->sd_diptrs, gfs2_inptrs)) { log_crit(_("Error: Bad constants (1)\n")); exit(-1); } /* ---------------------------------------------- */ /* Make sure we're really gfs1 */ /* ---------------------------------------------- */ if (sbp->sd_sb.sb_fs_format != GFS_FORMAT_FS || sbp->sd_sb.sb_header.mh_type != GFS_METATYPE_SB || sbp->sd_sb.sb_header.mh_format != GFS_FORMAT_SB || sbp->sd_sb.sb_multihost_format != GFS_FORMAT_MULTI) { log_crit(_("Error: %s does not look like a gfs1 filesystem.\n"), device); close(sbp->device_fd); exit(-1); } /* get gfs1 rindex inode - gfs1's rindex inode ptr became __pad2 */ gfs2_inum_in(&inum, (char *)&raw_gfs1_ondisk_sb.sb_rindex_di); sbp->md.riinode = lgfs2_gfs_inode_read(sbp, inum.no_addr); if (sbp->md.riinode == NULL) { log_crit(_("Could not read resource group index: %s\n"), strerror(errno)); exit(-1); } /* get gfs1 jindex inode - gfs1's journal index inode ptr became master */ gfs2_inum_in(&inum, (char *)&raw_gfs1_ondisk_sb.sb_jindex_di); sbp->md.jiinode = lgfs2_inode_read(sbp, inum.no_addr); if (sbp->md.jiinode == NULL) { log_crit(_("Could not read journal index: %s\n"), strerror(errno)); exit(-1); } /* read in the journal index data */ read_gfs1_jiindex(sbp); /* read in the resource group index data: */ /* We've got a slight dilemma here. In gfs1, we used to have a meta */ /* header in front of the rgindex pages. In gfs2, we don't. That's */ /* apparently only for directories. So we need to fake out libgfs2 */ /* so that it adjusts for the metaheader by faking out the inode to */ /* look like a directory, temporarily. */ sbp->md.riinode->i_di.di_mode &= ~S_IFMT; sbp->md.riinode->i_di.di_mode |= S_IFDIR; printf(_("Examining file system")); if (gfs1_ri_update(sbp, 0, &rgcount, 0)){ log_crit(_("Unable to fill in resource group information.\n")); return -1; } printf("\n"); fflush(stdout); inode_put(&sbp->md.riinode); inode_put(&sbp->md.jiinode); log_debug(_("%d rgs found.\n"), rgcount); return 0; }/* fill_super_block */ /* ------------------------------------------------------------------------- */ /* give_warning - give the all-important warning message. */ /* ------------------------------------------------------------------------- */ static void give_warning(void) { printf(_("This program will convert a gfs1 filesystem to a " \ "gfs2 filesystem.\n")); printf(_("WARNING: This can't be undone. It is strongly advised " \ "that you:\n\n")); printf(_(" 1. Back up your entire filesystem first.\n")); printf(_(" 2. Run fsck.gfs2 first to ensure filesystem integrity.\n")); printf(_(" 3. Make sure the filesystem is NOT mounted from any node.\n")); printf(_(" 4. Make sure you have the latest software versions.\n")); }/* give_warning */ /* ------------------------------------------------------------------------- */ /* version - print version information */ /* ------------------------------------------------------------------------- */ static void version(void) { log_notice(_("gfs2_convert version %s (built %s %s)\n"), VERSION, __DATE__, __TIME__); log_notice("%s\n\n", REDHAT_COPYRIGHT); } /* ------------------------------------------------------------------------- */ /* usage - print usage information */ /* ------------------------------------------------------------------------- */ static void usage(const char *name) { give_warning(); printf(_("\nUsage:\n")); printf(_("%s [-hnqvVy] \n\n"), name); printf("Flags:\n"); printf(_("\th - print this help message\n")); printf(_("\tn - assume 'no' to all questions\n")); printf(_("\tq - quieter output\n")); printf(_("\tv - more verbose output\n")); printf(_("\tV - print version information\n")); printf(_("\ty - assume 'yes' to all questions\n")); }/* usage */ /* ------------------------------------------------------------------------- */ /* process_parameters */ /* ------------------------------------------------------------------------- */ static void process_parameters(int argc, char **argv, struct gfs2_options *opts) { int c; opts->yes = 0; opts->no = 0; if (argc == 1) { usage(argv[0]); exit(0); } memset(device, 0, sizeof(device)); while((c = getopt(argc, argv, "hnqvyV")) != -1) { switch(c) { case 'h': usage(argv[0]); exit(0); break; case 'n': opts->no = 1; break; case 'q': decrease_verbosity(); break; case 'v': increase_verbosity(); break; case 'V': exit(0); case 'y': opts->yes = 1; break; default: fprintf(stderr,_("Parameter not understood: %c\n"), c); usage(argv[0]); exit(0); } } if(argc > optind) { strcpy(device, argv[optind]); opts->device = device; if(!opts->device) { fprintf(stderr, _("No device specified. Please use '-h' for help\n")); exit(1); } } else { fprintf(stderr, _("No device specified. Please use '-h' for help\n")); exit(1); } } /* process_parameters */ /* ------------------------------------------------------------------------- */ /* rgrp_length - Calculate the length of a resource group */ /* @size: The total size of the resource group */ /* ------------------------------------------------------------------------- */ static uint64_t rgrp_length(uint64_t size, struct gfs2_sbd *sdp) { uint64_t bitbytes = RGRP_BITMAP_BLKS(&sdp->sd_sb) + 1; uint64_t stuff = RGRP_STUFFED_BLKS(&sdp->sd_sb) + 1; uint64_t blocks = 1; if (size >= stuff) { size -= stuff; while (size > bitbytes) { blocks++; size -= bitbytes; } if (size) blocks++; } return blocks; }/* rgrp_length */ /* ------------------------------------------------------------------------- */ /* journ_space_to_rg - convert gfs1 journal space to gfs2 rg space. */ /* */ /* In gfs1, the journals were kept separate from the files and directories. */ /* They had a dedicated section of the fs carved out for them. */ /* In gfs2, the journals are just files like any other, (but still hidden). */ /* Therefore, the old journal space has to be converted to normal resource */ /* group space. */ /* */ /* Returns: 0 on success, -1 on failure */ /* ------------------------------------------------------------------------- */ static int journ_space_to_rg(struct gfs2_sbd *sdp) { int error = 0; int j, x; struct gfs1_jindex *jndx; struct rgrp_tree *rgd, *rgdhigh; struct osi_node *n, *next = NULL; struct gfs2_meta_header mh; uint64_t ri_addr; mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_RB; mh.mh_format = GFS2_FORMAT_RB; log_notice(_("Converting journal space to rg space.\n")); /* Go through each journal, converting them one by one */ for (j = 0; j < sdp->orig_journals; j++) { /* for each journal */ uint64_t size; jndx = &sd_jindex[j]; /* go through all rg index entries, keeping track of the highest that's still in the first subdevice. Note: we really should go through all of the rgindex because we might have had rg's added by gfs_grow, and journals added by jadd. gfs_grow adds rgs out of order, so we can't count on them being in ascending order. */ rgdhigh = NULL; for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rgd = (struct rgrp_tree *)n; if (rgd->ri.ri_addr < jndx->ji_addr && ((rgdhigh == NULL) || (rgd->ri.ri_addr > rgdhigh->ri.ri_addr))) rgdhigh = rgd; } /* for each rg */ if (!rgdhigh) { /* if we somehow didn't find one. */ log_crit(_("Error: No suitable rg found for journal.\n")); return -1; } log_info(_("Addr 0x%llx comes after rg at addr 0x%llx\n"), (unsigned long long)jndx->ji_addr, (unsigned long long)rgdhigh->ri.ri_addr); ri_addr = jndx->ji_addr; /* Allocate a new rgd entry which includes rg and ri. */ rgd = rgrp_insert(&sdp->rgtree, ri_addr); /* convert the gfs1 rgrp into a new gfs2 rgrp */ size = jndx->ji_nsegment * be32_to_cpu(raw_gfs1_ondisk_sb.sb_seg_size); rgd->rg.rg_header.mh_magic = GFS2_MAGIC; rgd->rg.rg_header.mh_type = GFS2_METATYPE_RG; rgd->rg.rg_header.mh_format = GFS2_FORMAT_RG; rgd->rg.rg_flags = 0; rgd->rg.rg_dinodes = 0; rgd->ri.ri_addr = jndx->ji_addr; /* new rg addr becomes ji addr */ rgd->ri.ri_length = rgrp_length(size, sdp); /* aka bitblocks */ rgd->ri.ri_data0 = jndx->ji_addr + rgd->ri.ri_length; rgd->ri.ri_data = size - rgd->ri.ri_length; /* Round down to nearest multiple of GFS2_NBBY */ while (rgd->ri.ri_data & 0x03) rgd->ri.ri_data--; sdp->blks_total += rgd->ri.ri_data; /* For statfs file update */ rgd->rg.rg_free = rgd->ri.ri_data; rgd->ri.ri_bitbytes = rgd->ri.ri_data / GFS2_NBBY; if(!(rgd->bh = (struct gfs2_buffer_head **) malloc(rgd->ri.ri_length * sizeof(struct gfs2_buffer_head *)))) return -1; if(!memset(rgd->bh, 0, rgd->ri.ri_length * sizeof(struct gfs2_buffer_head *))) { free(rgd->bh); return -1; } for (x = 0; x < rgd->ri.ri_length; x++) { rgd->bh[x] = bget(sdp, rgd->ri.ri_addr + x); memset(rgd->bh[x]->b_data, 0, sdp->bsize); } if (gfs2_compute_bitstructs(sdp->sd_sb.sb_bsize, rgd)) { log_crit(_("gfs2_convert: Error converting bitmaps.\n")); exit(-1); } convert_bitmaps(sdp, rgd); for (x = 0; x < rgd->ri.ri_length; x++) { if (x) gfs2_meta_header_out_bh(&mh, rgd->bh[x]); else gfs2_rgrp_out_bh(&rgd->rg, rgd->bh[x]); } } /* for each journal */ return error; }/* journ_space_to_rg */ /* ------------------------------------------------------------------------- */ /* update_inode_file - update the inode file with the new next_inum */ /* ------------------------------------------------------------------------- */ static void update_inode_file(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = sdp->md.inum; uint64_t buf; int count; buf = cpu_to_be64(sdp->md.next_inum); count = gfs2_writei(ip, &buf, 0, sizeof(uint64_t)); if (count != sizeof(uint64_t)) { fprintf(stderr, "update_inode_file\n"); exit(1); } log_debug(_("\nNext Inum: %llu\n"), (unsigned long long)sdp->md.next_inum); }/* update_inode_file */ /* ------------------------------------------------------------------------- */ /* write_statfs_file - write the statfs file */ /* ------------------------------------------------------------------------- */ static void write_statfs_file(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = sdp->md.statfs; struct gfs2_statfs_change sc; char buf[sizeof(struct gfs2_statfs_change)]; int count; sc.sc_total = sdp->blks_total; sc.sc_free = sdp->blks_total - sdp->blks_alloced; sc.sc_dinodes = sdp->dinodes_alloced; gfs2_statfs_change_out(&sc, buf); count = gfs2_writei(ip, buf, 0, sizeof(struct gfs2_statfs_change)); if (count != sizeof(struct gfs2_statfs_change)) { fprintf(stderr, "do_init (2)\n"); exit(1); } }/* write_statfs_file */ /* ------------------------------------------------------------------------- */ /* remove_obsolete_gfs1 - remove obsolete gfs1 inodes. */ /* ------------------------------------------------------------------------- */ static void remove_obsolete_gfs1(struct gfs2_sbd *sbp) { struct gfs2_inum inum; log_notice(_("Removing obsolete GFS1 file system structures.\n")); fflush(stdout); /* Delete the old gfs1 Journal index: */ gfs2_inum_in(&inum, (char *)&raw_gfs1_ondisk_sb.sb_jindex_di); gfs2_freedi(sbp, inum.no_addr); /* Delete the old gfs1 rgindex: */ gfs2_inum_in(&inum, (char *)&raw_gfs1_ondisk_sb.sb_rindex_di); gfs2_freedi(sbp, inum.no_addr); /* Delete the old gfs1 Quota file: */ gfs2_inum_in(&inum, (char *)&raw_gfs1_ondisk_sb.sb_quota_di); gfs2_freedi(sbp, inum.no_addr); /* Delete the old gfs1 License file: */ gfs2_inum_in(&inum, (char *)&raw_gfs1_ondisk_sb.sb_license_di); gfs2_freedi(sbp, inum.no_addr); } /* ------------------------------------------------------------------------- */ /* lifted from libgfs2/structures.c */ /* ------------------------------------------------------------------------- */ static int conv_build_jindex(struct gfs2_sbd *sdp) { unsigned int j; sdp->md.jiinode = createi(sdp->master_dir, "jindex", S_IFDIR | 0700, GFS2_DIF_SYSTEM); if (sdp->md.jiinode == NULL) { return errno; } sdp->md.journal = malloc(sdp->md.journals * sizeof(struct gfs2_inode *)); if (sdp->md.journal == NULL) { return errno; } for (j = 0; j < sdp->md.journals; j++) { char name[256]; printf(_("Writing journal #%d..."), j + 1); fflush(stdout); sprintf(name, "journal%u", j); sdp->md.journal[j] = createi(sdp->md.jiinode, name, S_IFREG | 0600, GFS2_DIF_SYSTEM); write_journal(sdp, j, sdp->jsize << 20 >> sdp->sd_sb.sb_bsize_shift); inode_put(&sdp->md.journal[j]); printf(_("done.\n")); fflush(stdout); } free(sdp->md.journal); if (sdp->debug) { printf("\nJindex:\n"); gfs2_dinode_print(&sdp->md.jiinode->i_di); } inode_put(&sdp->md.jiinode); return 0; } static unsigned int total_file_blocks(struct gfs2_sbd *sdp, uint64_t filesize, int journaled) { unsigned int data_blks = 0, meta_blks = 0; unsigned int max, height, bsize; uint64_t *arr; /* Now find the total meta blocks required for data_blks */ if (filesize <= sdp->bsize - sizeof(struct gfs2_dinode)) { goto out; } if (journaled) { arr = sdp->sd_jheightsize; max = sdp->sd_max_jheight; bsize = sdp->sd_jbsize; } else { arr = sdp->sd_heightsize; max = sdp->sd_max_height; bsize = sdp->bsize; } data_blks = DIV_RU(filesize, bsize); /* total data blocks reqd */ for (height = 0; height < max; height++) if (arr[height] >= filesize) break; if (height == 1) { goto out; } meta_blks = DIV_RU(data_blks, sdp->sd_inptrs); out: return data_blks + meta_blks; } /* We check if the GFS2 filesystem files/structures created after the call to * check_fit() in main() will fit in the currently available free blocks */ static int check_fit(struct gfs2_sbd *sdp) { unsigned int blks_need = 0, blks_avail = sdp->blks_total - sdp->blks_alloced; /* build_master() */ blks_need++; /*creation of master dir inode - 1 block */ /* conv_build_jindex() */ { blks_need++; /* creation of 'jindex' disk inode */ /* creation of journals */ blks_need += sdp->md.journals * total_file_blocks(sdp, sdp->jsize << 20, 1); } /* build_per_node() */ { blks_need++; /* creation of 'per_node' dir inode */ /* njourn x (inum_range + statfs_change + quota_change inodes) */ blks_need += sdp->md.journals * 3; /* quota change inodes are prealloced */ blks_need += sdp->md.journals * total_file_blocks(sdp, sdp->qcsize << 20, 1); } /* build_inum() */ blks_need++; /* creation of 'inum' disk inode */ /* build_statfs() */ blks_need++; /* creation of 'statfs' disk inode */ /* build_rindex() */ { struct osi_node *n, *next = NULL; unsigned int rg_count = 0; blks_need++; /* creationg of 'rindex' disk inode */ /* find the total # of rindex entries, gives size of rindex inode */ for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rg_count++; } blks_need += total_file_blocks(sdp, rg_count * sizeof(struct gfs2_rindex), 1); } /* build_quota() */ blks_need++; /* quota inode block and uid=gid=0 quota - total 1 block */ /* Up until this point we require blks_need blocks. We don't * include the blocks freed by the next step (remove_obsolete_gfs1) * because it's possible for us to exceed the available blocks * before this step */ return blks_avail > blks_need; } /* We fetch the old quota inode block and copy the contents of the block * (minus the struct gfs2_dinode) into the new quota block. We update the * inode height/size of the new quota file to that of the old one and set the * old quota inode height/size to zero, so only the inode block gets freed. */ static void copy_quotas(struct gfs2_sbd *sdp) { struct gfs2_inum inum; struct gfs2_inode *oq_ip, *nq_ip; int err; err = gfs2_lookupi(sdp->master_dir, "quota", 5, &nq_ip); if (err) { fprintf(stderr, _("Couldn't lookup new quota file: %d\n"), err); exit(1); } gfs2_inum_in(&inum, (char *)&raw_gfs1_ondisk_sb.sb_quota_di); oq_ip = lgfs2_inode_read(sdp, inum.no_addr); if (oq_ip == NULL) { fprintf(stderr, _("Couldn't lookup old quota file: %s\n"), strerror(errno)); exit(1); } nq_ip->i_di.di_height = oq_ip->i_di.di_height; nq_ip->i_di.di_size = oq_ip->i_di.di_size; nq_ip->i_di.di_blocks = oq_ip->i_di.di_blocks; memcpy(nq_ip->i_bh->b_data + sizeof(struct gfs2_dinode), oq_ip->i_bh->b_data + sizeof(struct gfs2_dinode), sdp->bsize - sizeof(struct gfs2_dinode)); oq_ip->i_di.di_height = 0; oq_ip->i_di.di_size = 0; bmodified(nq_ip->i_bh); inode_put(&nq_ip); bmodified(oq_ip->i_bh); inode_put(&oq_ip); } static int gfs2_query(struct gfs2_options *opts, const char *dev) { char response[3] = { 0, 0 }; int ret = 0; if(opts->yes) return 1; if(opts->no) return 0; opts->query = TRUE; while (1) { printf(_("Convert %s from GFS1 to GFS2? (y/n)"), dev); /* Make sure query is printed out */ fflush(NULL); fgets(response, 3, stdin); printf("\n"); fflush(NULL); response[1] = 0; ret = rpmatch(response); if (ret >= 0) break; printf(_("Bad response '%s', please type 'y' or 'n'.\n"), response); } opts->query = FALSE; return ret; } /* ------------------------------------------------------------------------- */ /* main - mainline code */ /* ------------------------------------------------------------------------- */ int main(int argc, char **argv) { int error; struct gfs2_buffer_head *bh; struct gfs2_options opts; /* Set i18n support to gfs2_convert */ setlocale(LC_ALL, ""); textdomain("gfs2-utils"); version(); process_parameters(argc, argv, &opts); error = init(&sb2); /* ---------------------------------------------- */ /* Make them seal their fate. */ /* ---------------------------------------------- */ if (!error) { give_warning(); if (!gfs2_query(&opts, device)) { log_crit(_("%s not converted.\n"), device); close(sb2.device_fd); exit(0); } } /* ---------------------------------------------- */ /* Convert incore gfs1 sb to gfs2 sb */ /* ---------------------------------------------- */ if (!error) { log_notice(_("Converting resource groups.")); fflush(stdout); error = convert_rgs(&sb2); log_notice("\n"); if (error) log_crit(_("%s: Unable to convert resource groups.\n"), device); fsync(sb2.device_fd); /* write the buffers to disk */ } /* ---------------------------------------------- */ /* Renumber the inodes consecutively. */ /* ---------------------------------------------- */ if (!error) { /* Add a string notifying inode converstion start? */ error = inode_renumber(&sb2, sb2.sd_sb.sb_root_dir.no_addr, (osi_list_t *)&cdpns_to_fix); if (error) log_crit(_("\n%s: Error renumbering inodes.\n"), device); fsync(sb2.device_fd); /* write the buffers to disk */ } /* ---------------------------------------------- */ /* Fix the directories to match the new numbers. */ /* ---------------------------------------------- */ if (!error) { error = fix_directory_info(&sb2, (osi_list_t *)&dirs_to_fix); log_notice(_("\r%llu directories, %llu dirents fixed."), (unsigned long long)dirs_fixed, (unsigned long long)dirents_fixed); fflush(stdout); if (error) log_crit(_("\n%s: Error fixing directories.\n"), device); } /* ---------------------------------------------- */ /* Convert cdpn symlinks to empty dirs */ /* ---------------------------------------------- */ if (!error) { error = fix_cdpn_symlinks(&sb2, (osi_list_t *)&cdpns_to_fix); log_notice(_("\r%llu cdpn symlinks moved to empty directories."), (unsigned long long)cdpns_fixed); fflush(stdout); if (error) log_crit(_("\n%s: Error fixing cdpn symlinks.\n"), device); } /* ---------------------------------------------- */ /* Convert journal space to rg space */ /* ---------------------------------------------- */ if (!error) { log_notice(_("\nConverting journals.\n")); error = journ_space_to_rg(&sb2); if (error) log_crit(_("%s: Error converting journal space.\n"), device); fsync(sb2.device_fd); /* write the buffers to disk */ } /* ---------------------------------------------- */ /* Create our system files and directories. */ /* ---------------------------------------------- */ if (!error) { int jreduce = 0; /* Now we've got to treat it as a gfs2 file system */ if (compute_constants(&sb2)) { log_crit(_("Error: Bad constants (1)\n")); exit(-1); } /* Check if all the files we're about to create will * fit into the space remaining on the device */ while (!check_fit(&sb2)) { sb2.jsize--; /* reduce jsize by 1MB each time */ jreduce = 1; } if (jreduce) log_notice(_("Reduced journal size to %u MB to accommodate " "GFS2 file system structures.\n"), sb2.jsize); /* Build the master subdirectory. */ build_master(&sb2); /* Does not do inode_put */ sb2.sd_sb.sb_master_dir = sb2.master_dir->i_di.di_num; /* Build empty journal index file. */ error = conv_build_jindex(&sb2); if (error) { log_crit(_("Error: could not build jindex: %s\n"), strerror(error)); exit(-1); } log_notice(_("Building GFS2 file system structures.\n")); /* Build the per-node directories */ error = build_per_node(&sb2); if (error) { log_crit(_("Error building per-node directories: %s\n"), strerror(error)); exit(-1); } /* Create the empty inode number file */ error = build_inum(&sb2); /* Does not do inode_put */ if (error) { log_crit(_("Error building inum inode: %s\n"), strerror(error)); exit(-1); } gfs2_lookupi(sb2.master_dir, "inum", 4, &sb2.md.inum); /* Create the statfs file */ error = build_statfs(&sb2); /* Does not do inode_put */ if (error) { log_crit(_("Error building statfs inode: %s\n"), strerror(error)); exit(-1); } gfs2_lookupi(sb2.master_dir, "statfs", 6, &sb2.md.statfs); do_init_statfs(&sb2); /* Create the resource group index file */ error = build_rindex(&sb2); if (error) { log_crit(_("Error building rindex inode: %s\n"), strerror(error)); exit(-1); } /* Create the quota file */ error = build_quota(&sb2); if (error) { log_crit(_("Error building quota inode: %s\n"), strerror(error)); exit(-1); } /* Copy out the master dinode */ { struct gfs2_inode *ip = sb2.master_dir; if (ip->i_bh->b_modified) gfs2_dinode_out(&ip->i_di, ip->i_bh); } /* Copy old quotas */ copy_quotas(&sb2); update_inode_file(&sb2); /* Now delete the now-obsolete gfs1 files: */ remove_obsolete_gfs1(&sb2); write_statfs_file(&sb2); inode_put(&sb2.master_dir); inode_put(&sb2.md.inum); inode_put(&sb2.md.statfs); fsync(sb2.device_fd); /* write the buffers to disk */ /* Now free all the in memory */ gfs2_rgrp_free(&sb2.rgtree); log_notice(_("Committing changes to disk.\n")); fflush(stdout); /* Set filesystem type in superblock to gfs2. We do this at the */ /* end because if the tool is interrupted in the middle, we want */ /* it to not reject the partially converted fs as already done */ /* when it's run a second time. */ bh = bread(&sb2, sb2.sb_addr); sb2.sd_sb.sb_fs_format = GFS2_FORMAT_FS; sb2.sd_sb.sb_multihost_format = GFS2_FORMAT_MULTI; gfs2_sb_out(&sb2.sd_sb, bh); brelse(bh); error = fsync(sb2.device_fd); if (error) perror(device); else log_notice(_("%s: filesystem converted successfully to gfs2.\n"), device); } close(sb2.device_fd); if (sd_jindex) free(sd_jindex); exit(0); } gfs2-utils/gfs2/convert/target.mk0000664000175000017500000000012112110647577015673 0ustar andyandy $(eval $(call make-trans-binary,/usr/sbin/gfs2_convert,gfs2/libgfs2/libgfs2.a)) gfs2-utils/gfs2/edit/Makefile.am0000664000175000017500000000073112110647577015364 0ustar andyandyMAINTAINERCLEANFILES = Makefile.in sbin_PROGRAMS = gfs2_edit noinst_HEADERS = gfs2hex.h hexedit.h extended.h gfs2_edit_SOURCES = gfs2hex.c hexedit.c savemeta.c extended.c gfs2_edit_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -DHELPER_PROGRAM \ -I$(top_srcdir)/gfs2/include \ -I$(top_srcdir)/gfs2/libgfs2 gfs2_edit_CFLAGS = $(ncurses_CFLAGS) $(zlib_CFLAGS) gfs2_edit_LDFLAGS = $(ncurses_LIBS) $(zlib_LIBS) gfs2_edit_LDADD = $(top_builddir)/gfs2/libgfs2/libgfs2.la gfs2-utils/gfs2/edit/extended.c0000664000175000017500000004072712154127655015303 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "copyright.cf" #include "hexedit.h" #include "libgfs2.h" #include "extended.h" #include "gfs2hex.h" static void print_block_details(struct iinfo *ind, int level, int cur_height, int pndx, uint64_t file_offset); static int get_height(void) { int cur_height = 0, i; if (gfs2_struct_type != GFS2_METATYPE_DI) { for (i = 0; i <= blockhist && i < 5; i++) { if (blockstack[(blockhist - i) % BLOCK_STACK_SIZE].gfs2_struct_type == GFS2_METATYPE_DI) break; cur_height++; } } return cur_height; } static int _do_indirect_extended(char *diebuf, struct iinfo *iinf, int hgt) { unsigned int x, y; uint64_t p; int i_blocks; i_blocks = 0; for (x = 0; x < 512; x++) { iinf->ii[x].is_dir = 0; iinf->ii[x].height = 0; iinf->ii[x].block = 0; iinf->ii[x].dirents = 0; memset(&iinf->ii[x].dirent, 0, sizeof(struct gfs2_dirents)); } for (x = (sbd.gfs1 ? sizeof(struct gfs_indirect): sizeof(struct gfs2_meta_header)), y = 0; x < sbd.bsize; x += sizeof(uint64_t), y++) { p = be64_to_cpu(*(uint64_t *)(diebuf + x)); if (p) { iinf->ii[i_blocks].block = p; iinf->ii[i_blocks].mp.mp_list[hgt] = i_blocks; iinf->ii[i_blocks].is_dir = FALSE; i_blocks++; } } return i_blocks; } int do_indirect_extended(char *diebuf, struct iinfo *iinf) { return _do_indirect_extended(diebuf, iinf, get_height()); } /* ------------------------------------------------------------------------ */ /* dinode_valid - check if we have a dinode in recent history */ /* ------------------------------------------------------------------------ */ static int dinode_valid(void) { int i; if (gfs2_struct_type == GFS2_METATYPE_DI) return 1; for (i = 0; i <= blockhist && i < 5; i++) { if (blockstack[(blockhist - i) % BLOCK_STACK_SIZE].gfs2_struct_type == GFS2_METATYPE_DI) return 1; } return 0; } static uint64_t metapath_to_lblock(struct metapath *mp, int hgt) { int h; uint64_t lblock = 0; uint64_t factor[GFS2_MAX_META_HEIGHT]; if (di.di_height < 2) return mp->mp_list[0]; /* figure out multiplication factors for each height */ memset(&factor, 0, sizeof(factor)); factor[di.di_height - 1] = 1ull; for (h = di.di_height - 2; h >= 0; h--) factor[h] = factor[h + 1] * sbd.sd_inptrs; for (h = 0; h <= hgt; h++) lblock += (mp->mp_list[h] * factor[h]); return lblock; } static int display_indirect(struct iinfo *ind, int indblocks, int level, uint64_t startoff) { int start_line; int cur_height = -1, pndx; last_entry_onscreen[dmode] = 0; if (!has_indirect_blocks()) return -1; if (!level) { if (gfs2_struct_type == GFS2_METATYPE_DI) { if (S_ISDIR(di.di_mode)) print_gfs2("This directory contains %d indirect blocks", indblocks); else print_gfs2("This inode contains %d indirect blocks", indblocks); } else print_gfs2("This indirect block contains %d indirect blocks", indblocks); } if (dinode_valid() && !S_ISDIR(di.di_mode)) { /* See if we are on an inode or have one in history. */ if (level) cur_height = level; else { cur_height = get_height(); print_gfs2(" (at height %d of %d)", cur_height, di.di_height); } } eol(0); if (!level && indblocks) { print_gfs2("Indirect blocks:"); eol(0); } start_line = line; for (pndx = start_row[dmode]; (!termlines || pndx < termlines - start_line - 1 + start_row[dmode]) && pndx < indblocks; pndx++) { uint64_t file_offset; if (pndx && ind->ii[pndx].block == ind->ii[pndx - 1].block) continue; print_entry_ndx = pndx; if (termlines) { if (edit_row[dmode] >= 0 && line - start_line == edit_row[dmode] - start_row[dmode]) COLORS_HIGHLIGHT; move(line, 1); } if (!termlines) { int h; for (h = 0; h < level; h++) print_gfs2(" "); } print_gfs2("%d => ", pndx); if (termlines) move(line,9); print_gfs2("0x%llx / %lld", ind->ii[pndx].block, ind->ii[pndx].block); if (termlines) { if (edit_row[dmode] >= 0 && line - start_line == edit_row[dmode] - start_row[dmode]) { sprintf(estring, "%llx", (unsigned long long)ind->ii[print_entry_ndx].block); strcpy(edit_fmt, "%llx"); edit_size[dmode] = strlen(estring); COLORS_NORMAL; } } if (dinode_valid() && !S_ISDIR(di.di_mode)) { float human_off; char h; file_offset = metapath_to_lblock(&ind->ii[pndx].mp, cur_height) * sbd.bsize; print_gfs2(" "); h = 'K'; human_off = (file_offset / 1024.0); if (human_off > 1024.0) { h = 'M'; human_off /= 1024.0; } if (human_off > 1024.0) { h = 'G'; human_off /= 1024.0; } if (human_off > 1024.0) { h = 'T'; human_off /= 1024.0; } if (human_off > 1024.0) { h = 'P'; human_off /= 1024.0; } if (human_off > 1024.0) { h = 'E'; human_off /= 1024.0; } print_gfs2("(data offset 0x%llx / %lld / %6.2f%c)", file_offset, file_offset, human_off, h); print_gfs2(" "); } else file_offset = 0; if (dinode_valid() && !termlines && ((level + 1 < di.di_height) || (S_ISDIR(di.di_mode) && level <= di.di_height))) { print_block_details(ind, level, cur_height, pndx, file_offset); } print_entry_ndx = pndx; /* restore after recursion */ eol(0); } /* for each display row */ if (line >= 7) /* 7 because it was bumped at the end */ last_entry_onscreen[dmode] = line - 7; eol(0); end_row[dmode] = indblocks; if (end_row[dmode] < last_entry_onscreen[dmode]) end_row[dmode] = last_entry_onscreen[dmode]; lines_per_row[dmode] = 1; return 0; } static void print_inode_type(__be16 de_type) { if (sbd.gfs1) { switch(de_type) { case GFS_FILE_NON: print_gfs2("Unknown"); break; case GFS_FILE_REG: print_gfs2("File "); break; case GFS_FILE_DIR: print_gfs2("Dir "); break; case GFS_FILE_LNK: print_gfs2("Symlink"); break; case GFS_FILE_BLK: print_gfs2("BlkDev "); break; case GFS_FILE_CHR: print_gfs2("ChrDev "); break; case GFS_FILE_FIFO: print_gfs2("Fifo "); break; case GFS_FILE_SOCK: print_gfs2("Socket "); break; default: print_gfs2("%04x ", de_type); break; } return; } switch(de_type) { case DT_UNKNOWN: print_gfs2("Unknown"); break; case DT_REG: print_gfs2("File "); break; case DT_DIR: print_gfs2("Dir "); break; case DT_LNK: print_gfs2("Symlink"); break; case DT_BLK: print_gfs2("BlkDev "); break; case DT_CHR: print_gfs2("ChrDev "); break; case DT_FIFO: print_gfs2("Fifo "); break; case DT_SOCK: print_gfs2("Socket "); break; default: print_gfs2("%04x ", de_type); break; } } static int display_leaf(struct iinfo *ind) { int start_line, total_dirents = start_row[dmode]; int d; eol(0); if (gfs2_struct_type == GFS2_METATYPE_SB) print_gfs2("The superblock has 2 directories"); else print_gfs2("Directory block: lf_depth:%d, lf_entries:%d," "fmt:%d next=0x%llx (%d dirents).", ind->ii[0].lf_depth, ind->ii[0].lf_entries, ind->ii[0].lf_dirent_format, ind->ii[0].lf_next, ind->ii[0].dirents); start_line = line; for (d = start_row[dmode]; d < ind->ii[0].dirents; d++) { if (termlines && d >= termlines - start_line - 2 + start_row[dmode]) break; total_dirents++; if (ind->ii[0].dirents >= 1) { eol(3); if (termlines) { if (edit_row[dmode] >=0 && line - start_line - 1 == edit_row[dmode] - start_row[dmode]) { COLORS_HIGHLIGHT; sprintf(estring, "%llx", (unsigned long long)ind->ii[0].dirent[d].block); strcpy(edit_fmt, "%llx"); } } print_gfs2("%d/%d [%08x] %lld/%lld (0x%llx/0x%llx): ", total_dirents, d + 1, ind->ii[0].dirent[d].dirent.de_hash, ind->ii[0].dirent[d].dirent.de_inum.no_formal_ino, ind->ii[0].dirent[d].block, ind->ii[0].dirent[d].dirent.de_inum.no_formal_ino, ind->ii[0].dirent[d].block); } print_inode_type(ind->ii[0].dirent[d].dirent.de_type); print_gfs2(" %s", ind->ii[0].dirent[d].filename); if (termlines) { if (edit_row[dmode] >= 0 && line - start_line - 1 == edit_row[dmode] - start_row[dmode]) COLORS_NORMAL; } } if (line >= 4) last_entry_onscreen[dmode] = line - 4; eol(0); end_row[dmode] = ind->ii[0].dirents; if (end_row[dmode] < last_entry_onscreen[dmode]) end_row[dmode] = last_entry_onscreen[dmode]; return 0; } static void print_block_details(struct iinfo *ind, int level, int cur_height, int pndx, uint64_t file_offset) { struct iinfo *more_indir; int more_ind; char *tmpbuf; uint64_t thisblk; thisblk = ind->ii[pndx].block; more_indir = malloc(sizeof(struct iinfo)); if (!more_indir) { fprintf(stderr, "Out of memory in function " "display_indirect\n"); return; } tmpbuf = malloc(sbd.bsize); if (!tmpbuf) { fprintf(stderr, "Out of memory in function " "display_indirect\n"); free(more_indir); return; } while (thisblk) { lseek(sbd.device_fd, thisblk * sbd.bsize, SEEK_SET); /* read in the desired block */ if (read(sbd.device_fd, tmpbuf, sbd.bsize) != sbd.bsize) { fprintf(stderr, "bad read: %s from %s:%d: block %lld " "(0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)ind->ii[pndx].block, (unsigned long long)ind->ii[pndx].block); exit(-1); } thisblk = 0; memset(more_indir, 0, sizeof(struct iinfo)); if (S_ISDIR(di.di_mode) && level == di.di_height) { thisblk = do_leaf_extended(tmpbuf, more_indir); display_leaf(more_indir); } else { int x; for (x = 0; x < 512; x++) { memcpy(&more_indir->ii[x].mp, &ind->ii[pndx].mp, sizeof(struct metapath)); more_indir->ii[x].mp.mp_list[cur_height+1] = x; } more_ind = _do_indirect_extended(tmpbuf, more_indir, cur_height + 1); display_indirect(more_indir, more_ind, level + 1, file_offset); } if (thisblk) { eol(0); if (termlines) move(line,9); print_gfs2("Continuation block 0x%llx / %lld", thisblk, thisblk); } } free(tmpbuf); free(more_indir); } static void gfs_jindex_print(struct gfs_jindex *ji) { pv((unsigned long long)ji, ji_addr, "%llu", "0x%llx"); pv(ji, ji_nsegment, "%u", "0x%x"); pv(ji, ji_pad, "%u", "0x%x"); } static int print_jindex(struct gfs2_inode *dij) { int error, start_line; struct gfs_jindex ji; char jbuf[sizeof(struct gfs_jindex)]; start_line = line; error = 0; print_gfs2("Journal index entries found: %d.", dij->i_di.di_size / sizeof(struct gfs_jindex)); eol(0); lines_per_row[dmode] = 4; for (print_entry_ndx=0; ; print_entry_ndx++) { error = gfs2_readi(dij, (void *)&jbuf, print_entry_ndx*sizeof(struct gfs_jindex), sizeof(struct gfs_jindex)); gfs_jindex_in(&ji, jbuf); if (!error) /* end of file */ break; if (!termlines || (print_entry_ndx >= start_row[dmode] && ((print_entry_ndx - start_row[dmode])+1) * lines_per_row[dmode] <= termlines - start_line - 2)) { if (edit_row[dmode] == print_entry_ndx) { COLORS_HIGHLIGHT; strcpy(efield, "ji_addr"); sprintf(estring, "%llx", (unsigned long long)ji.ji_addr); } print_gfs2("Journal #%d", print_entry_ndx); eol(0); if (edit_row[dmode] == print_entry_ndx) COLORS_NORMAL; gfs_jindex_print(&ji); last_entry_onscreen[dmode] = print_entry_ndx; } } end_row[dmode] = print_entry_ndx; return error; } static int parse_rindex(struct gfs2_inode *dip, int print_rindex) { int error, start_line; struct gfs2_rindex ri; char rbuf[sizeof(struct gfs2_rindex)]; char highlighted_addr[32]; start_line = line; error = 0; print_gfs2("RG index entries found: %d.", dip->i_di.di_size / sizeof(struct gfs2_rindex)); eol(0); lines_per_row[dmode] = 6; memset(highlighted_addr, 0, sizeof(highlighted_addr)); for (print_entry_ndx=0; ; print_entry_ndx++) { uint64_t roff; roff = print_entry_ndx * sizeof(struct gfs2_rindex); error = gfs2_readi(dip, (void *)&rbuf, roff, sizeof(struct gfs2_rindex)); if (!error) /* end of file */ break; gfs2_rindex_in(&ri, rbuf); if (!termlines || (print_entry_ndx >= start_row[dmode] && ((print_entry_ndx - start_row[dmode])+1) * lines_per_row[dmode] <= termlines - start_line - 2)) { if (edit_row[dmode] == print_entry_ndx) { COLORS_HIGHLIGHT; sprintf(highlighted_addr, "%llx", (unsigned long long)ri.ri_addr); } print_gfs2("RG #%d", print_entry_ndx); if (!print_rindex) print_gfs2(" located at: %llu (0x%llx)", ri.ri_addr, ri.ri_addr); eol(0); if (edit_row[dmode] == print_entry_ndx) COLORS_NORMAL; if(print_rindex) gfs2_rindex_print(&ri); else { struct gfs2_buffer_head *tmp_bh; tmp_bh = bread(&sbd, ri.ri_addr); if (sbd.gfs1) { struct gfs_rgrp rg1; gfs_rgrp_in(&rg1, tmp_bh); gfs_rgrp_print(&rg1); } else { struct gfs2_rgrp rg; gfs2_rgrp_in(&rg, tmp_bh); gfs2_rgrp_print(&rg); } brelse(tmp_bh); } last_entry_onscreen[dmode] = print_entry_ndx; } } strcpy(estring, highlighted_addr); end_row[dmode] = print_entry_ndx; return error; } static int print_inum(struct gfs2_inode *dii) { uint64_t inum, inodenum; int rc; rc = gfs2_readi(dii, (void *)&inum, 0, sizeof(inum)); if (!rc) { print_gfs2("The inum file is empty."); eol(0); return 0; } if (rc != sizeof(inum)) { print_gfs2("Error reading inum file."); eol(0); return -1; } inodenum = be64_to_cpu(inum); print_gfs2("Next inode num = %lld (0x%llx)", inodenum, inodenum); eol(0); return 0; } static int print_statfs(struct gfs2_inode *dis) { struct gfs2_statfs_change sfb, sfc; int rc; rc = gfs2_readi(dis, (void *)&sfb, 0, sizeof(sfb)); if (!rc) { print_gfs2("The statfs file is empty."); eol(0); return 0; } if (rc != sizeof(sfb)) { print_gfs2("Error reading statfs file."); eol(0); return -1; } gfs2_statfs_change_in(&sfc, (char *)&sfb); print_gfs2("statfs file contents:"); eol(0); gfs2_statfs_change_print(&sfc); return 0; } static int print_quota(struct gfs2_inode *diq) { struct gfs2_quota qbuf, q; int i, error; print_gfs2("quota file contents:"); eol(0); print_gfs2("quota entries found: %d.", diq->i_di.di_size / sizeof(q)); eol(0); for (i=0; ; i++) { error = gfs2_readi(diq, (void *)&qbuf, i * sizeof(q), sizeof(qbuf)); if (!error) break; if (error != sizeof(qbuf)) { print_gfs2("Error reading quota file."); eol(0); return -1; } gfs2_quota_in(&q, (char *)&qbuf); print_gfs2("Entry #%d", i + 1); eol(0); gfs2_quota_print(&q); } return 0; } int display_extended(void) { struct gfs2_inode *tmp_inode; struct gfs2_buffer_head *tmp_bh; dsplines = termlines - line - 1; /* Display any indirect pointers that we have. */ if (block_is_rindex()) { tmp_bh = bread(&sbd, block); tmp_inode = lgfs2_inode_get(&sbd, tmp_bh); if (tmp_inode == NULL) return -1; parse_rindex(tmp_inode, TRUE); inode_put(&tmp_inode); brelse(tmp_bh); } else if (has_indirect_blocks() && !indirect_blocks && !display_leaf(indirect)) return -1; else if (display_indirect(indirect, indirect_blocks, 0, 0) == 0) return -1; else if (block_is_rgtree()) { if (sbd.gfs1) tmp_bh = bread(&sbd, sbd1->sb_rindex_di.no_addr); else tmp_bh = bread(&sbd, masterblock("rindex")); tmp_inode = lgfs2_inode_get(&sbd, tmp_bh); if (tmp_inode == NULL) return -1; parse_rindex(tmp_inode, FALSE); inode_put(&tmp_inode); brelse(tmp_bh); } else if (block_is_jindex()) { tmp_bh = bread(&sbd, block); tmp_inode = lgfs2_inode_get(&sbd, tmp_bh); if (tmp_inode == NULL) return -1; print_jindex(tmp_inode); inode_put(&tmp_inode); brelse(tmp_bh); } else if (block_is_inum_file()) { tmp_bh = bread(&sbd, block); tmp_inode = lgfs2_inode_get(&sbd, tmp_bh); if (tmp_inode == NULL) return -1; print_inum(tmp_inode); inode_put(&tmp_inode); brelse(tmp_bh); } else if (block_is_statfs_file()) { tmp_bh = bread(&sbd, block); tmp_inode = lgfs2_inode_get(&sbd, tmp_bh); if (tmp_inode == NULL) return -1; print_statfs(tmp_inode); inode_put(&tmp_inode); brelse(tmp_bh); } else if (block_is_quota_file()) { tmp_bh = bread(&sbd, block); tmp_inode = lgfs2_inode_get(&sbd, tmp_bh); if (tmp_inode == NULL) return -1; print_quota(tmp_inode); inode_put(&tmp_inode); brelse(tmp_bh); } return 0; } gfs2-utils/gfs2/edit/extended.h0000664000175000017500000000024612110647577015302 0ustar andyandy#ifndef __EXTENDED_DOT_H__ #define __EXTENDED_DOT_H__ extern int do_indirect_extended(char *diebuf, struct iinfo *iinf); extern int display_extended(void); #endif gfs2-utils/gfs2/edit/gfs2hex.c0000664000175000017500000003404312173233423015033 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include "hexedit.h" #define WANT_GFS_CONVERSION_FUNCTIONS #include #include "extended.h" #include "gfs2hex.h" #include "libgfs2.h" #define pv(struct, member, fmt, fmt2) do { \ print_it(" "#member, fmt, fmt2, struct->member); \ } while (FALSE); #define pv2(struct, member, fmt, fmt2) do { \ print_it(" ", fmt, fmt2, struct->member); \ } while (FALSE); struct gfs2_sb sb; struct gfs2_buffer_head *bh; struct gfs2_dinode di; int line, termlines, modelines[DMODES]; char edit_fmt[80]; char estring[1024]; char efield[64]; int edit_mode = 0; int edit_row[DMODES], edit_col[DMODES]; int edit_size[DMODES], last_entry_onscreen[DMODES]; char edit_fmt[80]; enum dsp_mode dmode = HEX_MODE; /* display mode */ uint64_t block = 0; int blockhist = 0; struct iinfo *indirect; int indirect_blocks; uint64_t block_in_mem = -1; struct gfs2_sbd sbd; uint64_t starting_blk; struct blkstack_info blockstack[BLOCK_STACK_SIZE]; int identify = FALSE; char device[NAME_MAX]; uint64_t max_block = 0; int start_row[DMODES], end_row[DMODES], lines_per_row[DMODES]; struct gfs_sb *sbd1; int gfs2_struct_type; unsigned int offset; struct indirect_info masterdir; struct gfs2_inum gfs1_quota_di; int print_entry_ndx; struct gfs2_inum gfs1_license_di; int screen_chunk_size = 512; uint64_t temp_blk; int color_scheme = 0; int struct_len; uint64_t dev_offset = 0; int editing = 0; int insert = 0; const char *termtype; WINDOW *wind; int dsplines = 0; const char *block_type_str[15] = { "Clump", "Superblock", "Resource Group Header", "Resource Group Bitmap", "Dinode", "Indirect Block", "Leaf", "Journaled Data", "Log Header", "Log descriptor", "Ext. attrib", "Eattr Data", "Log Buffer", "Metatype 13", "Quota Change", }; struct gfs1_rgrp { struct gfs2_meta_header rg_header; uint32_t rg_flags; /* ?? */ uint32_t rg_free; /* Number (qty) of free data blocks */ /* Dinodes are USEDMETA, but are handled separately from other METAs */ uint32_t rg_useddi; /* Number (qty) of dinodes (used or free) */ uint32_t rg_freedi; /* Number (qty) of unused (free) dinodes */ struct gfs2_inum rg_freedi_list; /* 1st block in chain of free dinodes */ /* These META statistics do not include dinodes (used or free) */ uint32_t rg_usedmeta; /* Number (qty) of used metadata blocks */ uint32_t rg_freemeta; /* Number (qty) of unused metadata blocks */ char rg_reserved[64]; }; void eol(int col) /* end of line */ { if (termlines) { line++; move(line, col); } else { printf("\n"); for (; col > 0; col--) printf(" "); } } void __attribute__((format (printf, 1, 2))) print_gfs2(const char *fmt, ...) { va_list args; char string[PATH_MAX]; memset(string, 0, sizeof(string)); va_start(args, fmt); vsprintf(string, fmt, args); if (termlines) printw("%s", string); else printf("%s", string); va_end(args); } static void check_highlight(int highlight) { if (!termlines || line >= termlines) /* If printing or out of bounds */ return; if (dmode == HEX_MODE) { if (line == (edit_row[dmode] * lines_per_row[dmode]) + 4) { if (highlight) { COLORS_HIGHLIGHT; last_entry_onscreen[dmode] = print_entry_ndx; } else COLORS_NORMAL; } } else { if ((line * lines_per_row[dmode]) - 4 == (edit_row[dmode] - start_row[dmode]) * lines_per_row[dmode]) { if (highlight) { COLORS_HIGHLIGHT; last_entry_onscreen[dmode] = print_entry_ndx; } else COLORS_NORMAL; } } } void print_it(const char *label, const char *fmt, const char *fmt2, ...) { va_list args; char tmp_string[NAME_MAX]; const char *fmtstring; int decimalsize; if (!termlines || line < termlines) { va_start(args, fmt2); check_highlight(TRUE); if (termlines) { move(line,0); printw("%s", label); move(line,24); } else { if (!strcmp(label, " ")) printf("%-11s", label); else printf("%-24s", label); } vsprintf(tmp_string, fmt, args); if (termlines) printw("%s", tmp_string); else printf("%s", tmp_string); check_highlight(FALSE); if (fmt2) { decimalsize = strlen(tmp_string); va_end(args); va_start(args, fmt2); vsprintf(tmp_string, fmt2, args); check_highlight(TRUE); if (termlines) { move(line, 50); printw("%s", tmp_string); } else { int i; for (i=20 - decimalsize; i > 0; i--) printf(" "); printf("%s", tmp_string); } check_highlight(FALSE); } else { if (strstr(fmt,"X") || strstr(fmt,"x")) fmtstring="(hex)"; else if (strstr(fmt,"s")) fmtstring=""; else fmtstring="(decimal)"; if (termlines) { move(line, 50); printw("%s", fmtstring); } else printf("%s", fmtstring); } if (termlines) { refresh(); if (line == (edit_row[dmode] * lines_per_row[dmode]) + 4) { strcpy(efield, label + 2); /* it's indented */ strcpy(estring, tmp_string); strcpy(edit_fmt, fmt); edit_size[dmode] = strlen(estring); COLORS_NORMAL; } last_entry_onscreen[dmode] = (line / lines_per_row[dmode]) - 4; } eol(0); va_end(args); } } static int indirect_dirent(struct indirect_info *indir, char *ptr, int d) { struct gfs2_dirent de; gfs2_dirent_in(&de, ptr); if (de.de_rec_len < sizeof(struct gfs2_dirent) || de.de_rec_len > 4096 - sizeof(struct gfs2_dirent)) return -1; if (de.de_inum.no_addr) { indir->block = de.de_inum.no_addr; memcpy(&indir->dirent[d].dirent, &de, sizeof(struct gfs2_dirent)); memcpy(&indir->dirent[d].filename, ptr + sizeof(struct gfs2_dirent), de.de_name_len); indir->dirent[d].filename[de.de_name_len] = '\0'; indir->dirent[d].block = de.de_inum.no_addr; indir->is_dir = TRUE; indir->dirents++; } return de.de_rec_len; } void do_dinode_extended(struct gfs2_dinode *dine, struct gfs2_buffer_head *lbh) { unsigned int x, y, ptroff = 0; uint64_t p, last; int isdir = !!(S_ISDIR(dine->di_mode)) || (sbd.gfs1 && dine->__pad1 == GFS_FILE_DIR); indirect_blocks = 0; memset(indirect, 0, sizeof(struct iinfo)); if (dine->di_height > 0) { /* Indirect pointers */ for (x = sizeof(struct gfs2_dinode); x < sbd.bsize; x += sizeof(uint64_t)) { p = be64_to_cpu(*(uint64_t *)(lbh->b_data + x)); if (p) { indirect->ii[indirect_blocks].block = p; indirect->ii[indirect_blocks].mp.mp_list[0] = ptroff; indirect->ii[indirect_blocks].is_dir = FALSE; indirect_blocks++; } ptroff++; } } else if (isdir && !(dine->di_flags & GFS2_DIF_EXHASH)) { int skip = 0; /* Directory Entries: */ indirect->ii[0].dirents = 0; indirect->ii[0].block = block; indirect->ii[0].is_dir = TRUE; for (x = sizeof(struct gfs2_dinode); x < sbd.bsize; x += skip) { skip = indirect_dirent(indirect->ii, lbh->b_data + x, indirect->ii[0].dirents); if (skip <= 0) break; } } else if (isdir && (dine->di_flags & GFS2_DIF_EXHASH) && dine->di_height == 0) { /* Leaf Pointers: */ last = be64_to_cpu(*(uint64_t *)(lbh->b_data + sizeof(struct gfs2_dinode))); for (x = sizeof(struct gfs2_dinode), y = 0; y < (1 << dine->di_depth); x += sizeof(uint64_t), y++) { p = be64_to_cpu(*(uint64_t *)(lbh->b_data + x)); if (p != last || ((y + 1) * sizeof(uint64_t) == dine->di_size)) { struct gfs2_buffer_head *tmp_bh; int skip = 0, direntcount = 0; struct gfs2_leaf leaf; unsigned int bufoffset; if (last >= max_block) break; tmp_bh = bread(&sbd, last); gfs2_leaf_in(&leaf, tmp_bh); indirect->ii[indirect_blocks].dirents = 0; for (direntcount = 0, bufoffset = sizeof(struct gfs2_leaf); bufoffset < sbd.bsize; direntcount++, bufoffset += skip) { skip = indirect_dirent(&indirect->ii[indirect_blocks], tmp_bh->b_data + bufoffset, direntcount); if (skip <= 0) break; } brelse(tmp_bh); indirect->ii[indirect_blocks].block = last; indirect_blocks++; last = p; } /* if not duplicate pointer */ } /* for indirect pointers found */ } /* if exhash */ }/* do_dinode_extended */ /** * Returns: next leaf block, if any, in a chain of leaf blocks */ uint64_t do_leaf_extended(char *dlebuf, struct iinfo *indir) { int x, i; struct gfs2_dirent de; struct gfs2_leaf leaf; struct gfs2_buffer_head tbh; /* kludge */ x = 0; memset(indir, 0, sizeof(*indir)); tbh.b_data = dlebuf; gfs2_leaf_in(&leaf, &tbh); indir->ii[0].lf_depth = leaf.lf_depth; indir->ii[0].lf_entries = leaf.lf_entries; indir->ii[0].lf_dirent_format = leaf.lf_dirent_format; indir->ii[0].lf_next = leaf.lf_next; /* Directory Entries: */ for (i = sizeof(struct gfs2_leaf); i < sbd.bsize; i += de.de_rec_len) { gfs2_dirent_in(&de, dlebuf + i); if (de.de_inum.no_addr) { indir->ii[0].block = de.de_inum.no_addr; indir->ii[0].dirent[x].block = de.de_inum.no_addr; memcpy(&indir->ii[0].dirent[x].dirent, &de, sizeof(struct gfs2_dirent)); memcpy(&indir->ii[0].dirent[x].filename, dlebuf + i + sizeof(struct gfs2_dirent), de.de_name_len); indir->ii[0].dirent[x].filename[de.de_name_len] = '\0'; indir->ii[0].is_dir = TRUE; indir->ii[0].dirents++; x++; } if (de.de_rec_len <= sizeof(struct gfs2_dirent)) break; } return leaf.lf_next; } static void do_eattr_extended(struct gfs2_buffer_head *ebh) { struct gfs2_ea_header ea; unsigned int x; eol(0); print_gfs2("Eattr Entries:"); eol(0); for (x = sizeof(struct gfs2_meta_header); x < sbd.bsize; x += ea.ea_rec_len) { eol(0); gfs2_ea_header_in(&ea, ebh->b_data + x); gfs2_ea_header_print(&ea, ebh->b_data + x + sizeof(struct gfs2_ea_header)); } } static void gfs2_inum_print2(const char *title,struct gfs2_inum *no) { if (termlines) { check_highlight(TRUE); move(line,2); printw(title); check_highlight(FALSE); } else printf(" %s:",title); pv2(no, no_formal_ino, "%llu", "0x%llx"); if (!termlines) printf(" addr:"); pv2(no, no_addr, "%llu", "0x%llx"); } /** * gfs2_sb_print2 - Print out a superblock * @sb: the cpu-order buffer */ static void gfs2_sb_print2(struct gfs2_sb *sbp2) { gfs2_meta_header_print(&sbp2->sb_header); pv(sbp2, sb_fs_format, "%u", "0x%x"); pv(sbp2, sb_multihost_format, "%u", "0x%x"); if (sbd.gfs1) pv(sbd1, sb_flags, "%u", "0x%x"); pv(sbp2, sb_bsize, "%u", "0x%x"); pv(sbp2, sb_bsize_shift, "%u", "0x%x"); if (sbd.gfs1) { pv(sbd1, sb_seg_size, "%u", "0x%x"); gfs2_inum_print2("jindex ino", &sbd1->sb_jindex_di); gfs2_inum_print2("rindex ino", &sbd1->sb_rindex_di); } else gfs2_inum_print2("master dir", &sbp2->sb_master_dir); gfs2_inum_print2("root dir ", &sbp2->sb_root_dir); pv(sbp2, sb_lockproto, "%s", NULL); pv(sbp2, sb_locktable, "%s", NULL); if (sbd.gfs1) { gfs2_inum_print2("quota ino ", &gfs1_quota_di); gfs2_inum_print2("license ", &gfs1_license_di); } #ifdef GFS2_HAS_UUID print_it(" sb_uuid", "%s", NULL, str_uuid(sbp2->sb_uuid)); #endif } /** * gfs1_rgrp_in - read in a gfs1 rgrp */ static void gfs1_rgrp_in(struct gfs1_rgrp *rgrp, struct gfs2_buffer_head *rbh) { struct gfs1_rgrp *str = (struct gfs1_rgrp *)rbh->b_data; gfs2_meta_header_in(&rgrp->rg_header, rbh); rgrp->rg_flags = be32_to_cpu(str->rg_flags); rgrp->rg_free = be32_to_cpu(str->rg_free); rgrp->rg_useddi = be32_to_cpu(str->rg_useddi); rgrp->rg_freedi = be32_to_cpu(str->rg_freedi); gfs2_inum_in(&rgrp->rg_freedi_list, (char *)&str->rg_freedi_list); rgrp->rg_usedmeta = be32_to_cpu(str->rg_usedmeta); rgrp->rg_freemeta = be32_to_cpu(str->rg_freemeta); memcpy(rgrp->rg_reserved, str->rg_reserved, 64); } /** * gfs_rgrp_print - Print out a resource group header */ static void gfs1_rgrp_print(struct gfs1_rgrp *rg) { gfs2_meta_header_print(&rg->rg_header); pv(rg, rg_flags, "%u", "0x%x"); pv(rg, rg_free, "%u", "0x%x"); pv(rg, rg_useddi, "%u", "0x%x"); pv(rg, rg_freedi, "%u", "0x%x"); gfs2_inum_print(&rg->rg_freedi_list); pv(rg, rg_usedmeta, "%u", "0x%x"); pv(rg, rg_freemeta, "%u", "0x%x"); } int display_gfs2(void) { struct gfs2_meta_header mh; struct gfs2_rgrp rg; struct gfs2_leaf lf; struct gfs_log_header lh1; struct gfs2_log_header lh; struct gfs2_log_descriptor ld; struct gfs2_quota_change qc; uint32_t magic; magic = be32_to_cpu(*(uint32_t *)bh->b_data); switch (magic) { case GFS2_MAGIC: gfs2_meta_header_in(&mh, bh); if (mh.mh_type > GFS2_METATYPE_QC) print_gfs2("Unknown metadata type"); else print_gfs2("%s:", block_type_str[mh.mh_type]); eol(0); switch (mh.mh_type) { case GFS2_METATYPE_SB: gfs2_sb_in(&sbd.sd_sb, bh); gfs2_sb_print2(&sbd.sd_sb); break; case GFS2_METATYPE_RG: if (sbd.gfs1) { struct gfs1_rgrp rg1; gfs1_rgrp_in(&rg1, bh); gfs1_rgrp_print(&rg1); } else { gfs2_rgrp_in(&rg, bh); gfs2_rgrp_print(&rg); } break; case GFS2_METATYPE_RB: gfs2_meta_header_print(&mh); break; case GFS2_METATYPE_DI: gfs2_dinode_print(&di); break; case GFS2_METATYPE_IN: gfs2_meta_header_print(&mh); break; case GFS2_METATYPE_LF: gfs2_leaf_in(&lf, bh); gfs2_leaf_print(&lf); break; case GFS2_METATYPE_JD: gfs2_meta_header_print(&mh); break; case GFS2_METATYPE_LH: if (sbd.gfs1) { gfs_log_header_in(&lh1, bh); gfs_log_header_print(&lh1); } else { gfs2_log_header_in(&lh, bh); gfs2_log_header_print(&lh); } break; case GFS2_METATYPE_LD: gfs2_log_descriptor_in(&ld, bh); gfs2_log_descriptor_print(&ld); break; case GFS2_METATYPE_EA: do_eattr_extended(bh); break; case GFS2_METATYPE_ED: gfs2_meta_header_print(&mh); break; case GFS2_METATYPE_LB: gfs2_meta_header_print(&mh); break; case GFS2_METATYPE_QC: gfs2_quota_change_in(&qc, bh); gfs2_quota_change_print(&qc); break; default: break; } break; default: print_gfs2("Unknown block type"); eol(0); break; }; return(0); } gfs2-utils/gfs2/edit/gfs2hex.h0000664000175000017500000000062512110647577015051 0ustar andyandy#ifndef __GFS2HEX_DOT_H__ #define __GFS2HEX_DOT_H__ #include "hexedit.h" extern int display_gfs2(void); extern int edit_gfs2(void); extern void do_dinode_extended(struct gfs2_dinode *di, struct gfs2_buffer_head *lbh); extern void print_gfs2(const char *fmt, ...); extern uint64_t do_leaf_extended(char *dlebuf, struct iinfo *indir); extern void eol(int col); #endif /* __GFS2HEX_DOT_H__ */ gfs2-utils/gfs2/edit/hexedit.c0000664000175000017500000030257612173233423015130 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "copyright.cf" #include "hexedit.h" #include "libgfs2.h" #include "gfs2hex.h" #include "extended.h" const char *mtypes[] = {"none", "sb", "rg", "rb", "di", "in", "lf", "jd", "lh", "ld", "ea", "ed", "lb", "13", "qc"}; const char *allocdesc[2][5] = { {"Free ", "Data ", "Unlnk", "Meta ", "Resrv"}, {"Free ", "Data ", "FreeM", "Meta ", "Resrv"},}; struct gfs2_rgrp *lrgrp; struct gfs2_meta_header *lmh; struct gfs2_dinode *ldi; struct gfs2_leaf *lleaf; struct gfs2_log_header *llh; struct gfs2_log_descriptor *lld; int pgnum; int details = 0; long int gziplevel = 9; static int termcols; int display(int identify_only); /* for assigning numeric fields: */ #define checkassign(strfield, struct, member, value) do { \ if (strcmp(#member, strfield) == 0) { \ struct->member = (typeof(struct->member)) value; \ return 0; \ } \ } while(0) /* for assigning string fields: */ #define checkassigns(strfield, struct, member, val) do { \ if (strcmp(#member, strfield) == 0) { \ memset(struct->member, 0, sizeof(struct->member)); \ strncpy((char *)struct->member, (char *)val, \ sizeof(struct->member)); \ return 0; \ } \ } while(0) /* for printing numeric fields: */ #define checkprint(strfield, struct, member) do { \ if (strcmp(#member, strfield) == 0) { \ if (dmode == HEX_MODE) \ printf("0x%llx\n", \ (unsigned long long)struct->member); \ else \ printf("%llu\n", \ (unsigned long long)struct->member); \ return 0; \ } \ } while(0) /* for printing string fields: */ #define checkprints(strfield, struct, member) do { \ if (strcmp(#member, strfield) == 0) { \ printf("%s\n", struct->member); \ return 0; \ } \ } while(0) /* ------------------------------------------------------------------------- * field-related functions: * ------------------------------------------------------------------------- */ static int gfs2_sb_printval(struct gfs2_sb *lsb, const char *strfield) { checkprint(strfield, lsb, sb_fs_format); checkprint(strfield, lsb, sb_multihost_format); checkprint(strfield, lsb, __pad0); checkprint(strfield, lsb, sb_bsize); checkprint(strfield, lsb, sb_bsize_shift); checkprint(strfield, lsb, __pad1); checkprint(strfield, lsb, sb_master_dir.no_addr); checkprint(strfield, lsb, __pad2.no_addr); checkprint(strfield, lsb, sb_root_dir.no_addr); checkprints(strfield, lsb, sb_lockproto); checkprints(strfield, lsb, sb_locktable); checkprint(strfield, lsb, __pad3.no_addr); checkprint(strfield, lsb, __pad4.no_addr); if (strcmp(strfield, "sb_uuid") == 0) { printf("%s\n", str_uuid(lsb->sb_uuid)); return 0; } return -1; } static int gfs2_sb_assignval(struct gfs2_sb *lsb, const char *strfield, uint64_t value) { checkassign(strfield, lsb, sb_fs_format, value); checkassign(strfield, lsb, sb_multihost_format, value); checkassign(strfield, lsb, __pad0, value); checkassign(strfield, lsb, sb_bsize, value); checkassign(strfield, lsb, sb_bsize_shift, value); checkassign(strfield, lsb, __pad1, value); checkassign(strfield, lsb, sb_master_dir.no_addr, value); checkassign(strfield, lsb, __pad2.no_addr, value); checkassign(strfield, lsb, sb_root_dir.no_addr, value); checkassign(strfield, lsb, __pad3.no_addr, value); checkassign(strfield, lsb, __pad4.no_addr, value); return -1; } static int gfs2_sb_assigns(struct gfs2_sb *lsb, const char *strfield, const char *val) { checkassigns(strfield, lsb, sb_lockproto, val); checkassigns(strfield, lsb, sb_locktable, val); checkassigns(strfield, lsb, sb_uuid, val); return -1; } static int gfs2_dinode_printval(struct gfs2_dinode *dip, const char *strfield) { checkprint(strfield, dip, di_mode); checkprint(strfield, dip, di_uid); checkprint(strfield, dip, di_gid); checkprint(strfield, dip, di_nlink); checkprint(strfield, dip, di_size); checkprint(strfield, dip, di_blocks); checkprint(strfield, dip, di_atime); checkprint(strfield, dip, di_mtime); checkprint(strfield, dip, di_ctime); checkprint(strfield, dip, di_major); checkprint(strfield, dip, di_minor); checkprint(strfield, dip, di_goal_meta); checkprint(strfield, dip, di_goal_data); checkprint(strfield, dip, di_flags); checkprint(strfield, dip, di_payload_format); checkprint(strfield, dip, di_height); checkprint(strfield, dip, di_depth); checkprint(strfield, dip, di_entries); checkprint(strfield, dip, di_eattr); return -1; } static int gfs2_dinode_assignval(struct gfs2_dinode *dia, const char *strfield, uint64_t value) { checkassign(strfield, dia, di_mode, value); checkassign(strfield, dia, di_uid, value); checkassign(strfield, dia, di_gid, value); checkassign(strfield, dia, di_nlink, value); checkassign(strfield, dia, di_size, value); checkassign(strfield, dia, di_blocks, value); checkassign(strfield, dia, di_atime, value); checkassign(strfield, dia, di_mtime, value); checkassign(strfield, dia, di_ctime, value); checkassign(strfield, dia, di_major, value); checkassign(strfield, dia, di_minor, value); checkassign(strfield, dia, di_goal_meta, value); checkassign(strfield, dia, di_goal_data, value); checkassign(strfield, dia, di_flags, value); checkassign(strfield, dia, di_payload_format, value); checkassign(strfield, dia, di_height, value); checkassign(strfield, dia, di_depth, value); checkassign(strfield, dia, di_entries, value); checkassign(strfield, dia, di_eattr, value); return -1; } static int gfs2_rgrp_printval(struct gfs2_rgrp *rg, const char *strfield) { checkprint(strfield, rg, rg_flags); checkprint(strfield, rg, rg_free); checkprint(strfield, rg, rg_dinodes); return -1; } static int gfs2_rgrp_assignval(struct gfs2_rgrp *rg, const char *strfield, uint64_t value) { checkassign(strfield, rg, rg_flags, value); checkassign(strfield, rg, rg_free, value); checkassign(strfield, rg, rg_dinodes, value); return -1; } static int gfs2_leaf_printval(struct gfs2_leaf *lf, const char *strfield) { checkprint(strfield, lf, lf_depth); checkprint(strfield, lf, lf_entries); checkprint(strfield, lf, lf_dirent_format); checkprint(strfield, lf, lf_next); checkprints(strfield, lf, lf_reserved); return -1; } static int gfs2_leaf_assignval(struct gfs2_leaf *lf, const char *strfield, uint64_t value) { checkassign(strfield, lf, lf_depth, value); checkassign(strfield, lf, lf_entries, value); checkassign(strfield, lf, lf_dirent_format, value); checkassign(strfield, lf, lf_next, value); return -1; } static int gfs2_leaf_assigns(struct gfs2_leaf *lf, const char *strfield, const char *val) { checkassigns(strfield, lf, lf_reserved, val); return -1; } static int gfs2_lh_printval(struct gfs2_log_header *lh, const char *strfield) { checkprint(strfield, lh, lh_sequence); checkprint(strfield, lh, lh_flags); checkprint(strfield, lh, lh_tail); checkprint(strfield, lh, lh_blkno); checkprint(strfield, lh, lh_hash); return -1; } static int gfs2_lh_assignval(struct gfs2_log_header *lh, const char *strfield, uint64_t value) { checkassign(strfield, lh, lh_sequence, value); checkassign(strfield, lh, lh_flags, value); checkassign(strfield, lh, lh_tail, value); checkassign(strfield, lh, lh_blkno, value); checkassign(strfield, lh, lh_hash, value); return -1; } static int gfs2_ld_printval(struct gfs2_log_descriptor *ld, const char *strfield) { checkprint(strfield, ld, ld_type); checkprint(strfield, ld, ld_length); checkprint(strfield, ld, ld_data1); checkprint(strfield, ld, ld_data2); checkprints(strfield, ld, ld_reserved); return -1; } static int gfs2_ld_assignval(struct gfs2_log_descriptor *ld, const char *strfield, uint64_t value) { checkassign(strfield, ld, ld_type, value); checkassign(strfield, ld, ld_length, value); checkassign(strfield, ld, ld_data1, value); checkassign(strfield, ld, ld_data2, value); return -1; } static int gfs2_ld_assigns(struct gfs2_log_descriptor *ld, const char *strfield, const char *val) { checkassigns(strfield, ld, ld_reserved, val); return -1; } static int gfs2_qc_printval(struct gfs2_quota_change *qc, const char *strfield) { checkprint(strfield, qc, qc_change); checkprint(strfield, qc, qc_flags); checkprint(strfield, qc, qc_id); return -1; } static int gfs2_qc_assignval(struct gfs2_quota_change *qc, const char *strfield, uint64_t value) { checkassign(strfield, qc, qc_change, value); checkassign(strfield, qc, qc_flags, value); checkassign(strfield, qc, qc_id, value); return -1; } /* ------------------------------------------------------------------------- */ /* erase - clear the screen */ /* ------------------------------------------------------------------------- */ static void Erase(void) { bkgd(A_NORMAL|COLOR_PAIR(COLOR_NORMAL)); /* clear();*/ /* doesn't set background correctly */ erase(); /*bkgd(bg);*/ } /* ------------------------------------------------------------------------- */ /* display_title_lines */ /* ------------------------------------------------------------------------- */ static void display_title_lines(void) { Erase(); COLORS_TITLE; move(0, 0); printw("%-80s",TITLE1); move(termlines, 0); printw("%-79s",TITLE2); COLORS_NORMAL; } /* ------------------------------------------------------------------------- */ /* bobgets - get a string */ /* returns: 1 if user exited by hitting enter */ /* 0 if user exited by hitting escape */ /* ------------------------------------------------------------------------- */ static int bobgets(char string[],int x,int y,int sz,int *ch) { int done,runningy,rc; move(x,y); done=FALSE; COLORS_INVERSE; move(x,y); addstr(string); move(x,y); curs_set(2); refresh(); runningy=y; rc=0; while (!done) { *ch = getch(); if(*ch < 0x0100 && isprint(*ch)) { char *p=string+strlen(string); // end of the string *(p+1)='\0'; while (insert && p > &string[runningy-y]) { *p=*(p-1); p--; } string[runningy-y]=*ch; runningy++; move(x,y); addstr(string); if (runningy-y >= sz) { rc=1; *ch = KEY_RIGHT; done = TRUE; } } else { // special character, is it one we recognize? switch(*ch) { case(KEY_ENTER): case('\n'): case('\r'): rc=1; done=TRUE; string[runningy-y] = '\0'; break; case(KEY_CANCEL): case(0x01B): rc=0; done=TRUE; break; case(KEY_LEFT): if (dmode == HEX_MODE) { done = TRUE; rc = 1; } else runningy--; break; case(KEY_RIGHT): if (dmode == HEX_MODE) { done = TRUE; rc = 1; } else runningy++; break; case(KEY_DC): case(0x07F): if (runningy>=y) { char *p; p = &string[runningy - y]; while (*p) { *p = *(p + 1); p++; } *p = '\0'; runningy--; // remove the character from the string move(x,y); addstr(string); COLORS_NORMAL; addstr(" "); COLORS_INVERSE; runningy++; } break; case(KEY_BACKSPACE): if (runningy>y) { char *p; p = &string[runningy - y - 1]; while (*p) { *p = *(p + 1); p++; } *p='\0'; runningy--; // remove the character from the string move(x,y); addstr(string); COLORS_NORMAL; addstr(" "); COLORS_INVERSE; } break; case KEY_DOWN: // Down rc=0x5000U; done=TRUE; break; case KEY_UP: // Up rc=0x4800U; done=TRUE; break; case 0x014b: insert=!insert; move(0,68); if (insert) printw("insert "); else printw("replace"); break; default: move(0,70); printw("%08x",*ch); // ignore all other characters break; } // end switch on non-printable character } // end non-printable character move(x,runningy); refresh(); } // while !done if (sz>0) string[sz]='\0'; COLORS_NORMAL; return rc; }/* bobgets */ /****************************************************************************** ** instr - instructions ******************************************************************************/ static void gfs2instr(const char *s1, const char *s2) { COLORS_HIGHLIGHT; move(line,0); printw(s1); COLORS_NORMAL; move(line,17); printw(s2); line++; } /****************************************************************************** ******************************************************************************* ** ** void print_usage() ** ** Description: ** This routine prints out the appropriate commands for this application. ** ******************************************************************************* ******************************************************************************/ static void print_usage(void) { int ch; line = 2; Erase(); display_title_lines(); move(line++,0); printw("Supported commands: (roughly conforming to the rules of 'less')"); line++; move(line++,0); printw("Navigation:"); gfs2instr("/","Move up or down one screen full"); gfs2instr("/","Move up or down one line"); gfs2instr("/","Move left or right one byte"); gfs2instr("","Return to the superblock."); gfs2instr(" f","Forward one 4K block"); gfs2instr(" b","Backward one 4K block"); gfs2instr(" g","Goto a given block (number, master, root, rindex, jindex, etc)"); gfs2instr(" j","Jump to the highlighted 64-bit block number."); gfs2instr(" ","(You may also arrow up to the block number and hit enter)"); gfs2instr("","Return to a previous block (a block stack is kept)"); gfs2instr("","Jump forward to block before backspace (opposite of backspace)"); line++; move(line++, 0); printw("Other commands:"); gfs2instr(" h","This Help display"); gfs2instr(" c","Toggle the color scheme"); gfs2instr(" m","Switch display mode: hex -> GFS2 structure -> Extended"); gfs2instr(" q","Quit (same as hitting key)"); gfs2instr("","Edit a value (enter to save, esc to discard)"); gfs2instr(" ","(Currently only works on the hex display)"); gfs2instr("","Quit the program"); line++; move(line++, 0); printw("Notes: Areas shown in red are outside the bounds of the struct/file."); move(line++, 0); printw(" Areas shown in blue are file contents."); move(line++, 0); printw(" Characters shown in green are selected for edit on ."); move(line++, 0); move(line++, 0); printw("Press any key to return."); refresh(); while ((ch=getch()) == 0); // wait for input Erase(); } /* ------------------------------------------------------------------------ */ /* get_block_type */ /* returns: metatype if block is a GFS2 structure block type */ /* 0 if block is not a GFS2 structure */ /* ------------------------------------------------------------------------ */ static uint32_t get_block_type(const struct gfs2_buffer_head *lbh) { const struct gfs2_meta_header *mh = lbh->iov.iov_base; if (be32_to_cpu(mh->mh_magic) == GFS2_MAGIC) return be32_to_cpu(mh->mh_type); return 0; } /* ------------------------------------------------------------------------ */ /* display_block_type */ /* returns: metatype if block is a GFS2 structure block type */ /* 0 if block is not a GFS2 structure */ /* ------------------------------------------------------------------------ */ int display_block_type(int from_restore) { const struct gfs2_meta_header *mh; int ret_type = 0; /* return type */ /* first, print out the kind of GFS2 block this is */ if (termlines) { line = 1; move(line, 0); } print_gfs2("Block #"); if (termlines) { if (edit_row[dmode] == -1) COLORS_HIGHLIGHT; } if (block == RGLIST_DUMMY_BLOCK) print_gfs2("RG List "); else print_gfs2("%lld (0x%llx)", block, block); if (termlines) { if (edit_row[dmode] == -1) COLORS_NORMAL; } print_gfs2(" "); if (!from_restore) print_gfs2("of %llu (0x%llx) ", max_block, max_block); if (block == RGLIST_DUMMY_BLOCK) { ret_type = GFS2_METATYPE_RG; struct_len = sbd.gfs1 ? sizeof(struct gfs_rgrp) : sizeof(struct gfs2_rgrp); } else { ret_type = get_block_type(bh); switch (ret_type) { case GFS2_METATYPE_SB: /* 1 */ print_gfs2("(superblock)"); if (sbd.gfs1) struct_len = sizeof(struct gfs_sb); else struct_len = sizeof(struct gfs2_sb); break; case GFS2_METATYPE_RG: /* 2 */ print_gfs2("(rsrc grp hdr)"); struct_len = sizeof(struct gfs2_rgrp); break; case GFS2_METATYPE_RB: /* 3 */ print_gfs2("(rsrc grp bitblk)"); struct_len = sizeof(struct gfs2_meta_header); break; case GFS2_METATYPE_DI: /* 4 */ print_gfs2("(disk inode)"); struct_len = sizeof(struct gfs2_dinode); break; case GFS2_METATYPE_IN: /* 5 */ print_gfs2("(indir blklist)"); if (sbd.gfs1) struct_len = sizeof(struct gfs_indirect); else struct_len = sizeof(struct gfs2_meta_header); break; case GFS2_METATYPE_LF: /* 6 */ print_gfs2("(directory leaf)"); struct_len = sizeof(struct gfs2_leaf); break; case GFS2_METATYPE_JD: print_gfs2("(journal data)"); struct_len = sizeof(struct gfs2_meta_header); break; case GFS2_METATYPE_LH: print_gfs2("(log header)"); struct_len = sizeof(struct gfs2_log_header); break; case GFS2_METATYPE_LD: print_gfs2("(log descriptor)"); if (sbd.gfs1) struct_len = sizeof(struct gfs_log_descriptor); else struct_len = sizeof(struct gfs2_log_descriptor); break; case GFS2_METATYPE_EA: print_gfs2("(extended attr hdr)"); struct_len = sizeof(struct gfs2_meta_header) + sizeof(struct gfs2_ea_header); break; case GFS2_METATYPE_ED: print_gfs2("(extended attr data)"); struct_len = sizeof(struct gfs2_meta_header) + sizeof(struct gfs2_ea_header); break; case GFS2_METATYPE_LB: print_gfs2("(log buffer)"); struct_len = sizeof(struct gfs2_meta_header); break; case GFS2_METATYPE_QC: print_gfs2("(quota change)"); struct_len = sizeof(struct gfs2_quota_change); break; case 0: struct_len = sbd.bsize; break; default: print_gfs2("(wtf?)"); struct_len = sbd.bsize; break; } } mh = bh->iov.iov_base; eol(0); if (from_restore) return ret_type; if (termlines && dmode == HEX_MODE) { int type; struct rgrp_tree *rgd; rgd = gfs2_blk2rgrpd(&sbd, block); if (rgd) { gfs2_rgrp_read(&sbd, rgd); if ((be32_to_cpu(mh->mh_type) == GFS2_METATYPE_RG) || (be32_to_cpu(mh->mh_type) == GFS2_METATYPE_RB)) type = 4; else { type = lgfs2_get_bitmap(&sbd, block, rgd); } } else type = 4; screen_chunk_size = ((termlines - 4) * 16) >> 8 << 8; if (!screen_chunk_size) screen_chunk_size = 256; pgnum = (offset / screen_chunk_size); if (type >= 0) { print_gfs2("(p.%d of %d--%s)", pgnum + 1, (sbd.bsize % screen_chunk_size) > 0 ? sbd.bsize / screen_chunk_size + 1 : sbd.bsize / screen_chunk_size, allocdesc[sbd.gfs1][type]); } /*eol(9);*/ if ((be32_to_cpu(mh->mh_type) == GFS2_METATYPE_RG)) { int ptroffset = edit_row[dmode] * 16 + edit_col[dmode]; if (rgd && (ptroffset >= struct_len || pgnum)) { int blknum, b, btype; blknum = pgnum * screen_chunk_size; blknum += (ptroffset - struct_len); blknum *= 4; blknum += rgd->ri.ri_data0; print_gfs2(" blk "); for (b = blknum; b < blknum + 4; b++) { btype = lgfs2_get_bitmap(&sbd, b, rgd); if (btype >= 0) { print_gfs2("0x%x-%s ", b, allocdesc[sbd.gfs1][btype]); } } } } else if ((be32_to_cpu(mh->mh_type) == GFS2_METATYPE_RB)) { int ptroffset = edit_row[dmode] * 16 + edit_col[dmode]; if (rgd && (ptroffset >= struct_len || pgnum)) { int blknum, b, btype, rb_number; rb_number = block - rgd->ri.ri_addr; blknum = 0; /* count the number of bytes representing blocks prior to the displayed screen. */ for (b = 0; b < rb_number; b++) { struct_len = (b ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_rgrp)); blknum += (sbd.bsize - struct_len); } struct_len = sizeof(struct gfs2_meta_header); /* add the number of bytes on this screen */ blknum += (ptroffset - struct_len); /* factor in the page number */ blknum += pgnum * screen_chunk_size; /* convert bytes to blocks */ blknum *= GFS2_NBBY; /* add the starting offset for this rgrp */ blknum += rgd->ri.ri_data0; print_gfs2(" blk "); for (b = blknum; b < blknum + 4; b++) { btype = lgfs2_get_bitmap(&sbd, b, rgd); if (btype >= 0) { print_gfs2("0x%x-%s ", b, allocdesc[sbd.gfs1][btype]); } } } } if (rgd) gfs2_rgrp_relse(rgd); } if (block == sbd.sd_sb.sb_root_dir.no_addr) print_gfs2("--------------- Root directory ------------------"); else if (!sbd.gfs1 && block == sbd.sd_sb.sb_master_dir.no_addr) print_gfs2("-------------- Master directory -----------------"); else if (!sbd.gfs1 && block == RGLIST_DUMMY_BLOCK) print_gfs2("------------------ RG List ----------------------"); else { if (sbd.gfs1) { if (block == sbd1->sb_rindex_di.no_addr) print_gfs2("---------------- rindex file -------------------"); else if (block == gfs1_quota_di.no_addr) print_gfs2("---------------- Quota file --------------------"); else if (block == sbd1->sb_jindex_di.no_addr) print_gfs2("--------------- Journal Index ------------------"); else if (block == gfs1_license_di.no_addr) print_gfs2("--------------- License file -------------------"); } else { int d; for (d = 2; d < 8; d++) { if (block == masterdir.dirent[d].block) { if (!strncmp(masterdir.dirent[d].filename, "jindex", 6)) print_gfs2("--------------- Journal Index ------------------"); else if (!strncmp(masterdir.dirent[d].filename, "per_node", 8)) print_gfs2("--------------- Per-node Dir -------------------"); else if (!strncmp(masterdir.dirent[d].filename, "inum", 4)) print_gfs2("---------------- Inum file ---------------------"); else if (!strncmp(masterdir.dirent[d].filename, "statfs", 6)) print_gfs2("---------------- statfs file -------------------"); else if (!strncmp(masterdir.dirent[d].filename, "rindex", 6)) print_gfs2("---------------- rindex file -------------------"); else if (!strncmp(masterdir.dirent[d].filename, "quota", 5)) print_gfs2("---------------- Quota file --------------------"); } } } } eol(0); return ret_type; } static const struct lgfs2_metadata *find_mtype(uint32_t mtype, const unsigned versions) { const struct lgfs2_metadata *m = lgfs2_metadata; unsigned n = 0; do { if ((m[n].versions & versions) && m[n].mh_type == mtype) return &m[n]; n++; } while (n < lgfs2_metadata_size); return NULL; } /* ------------------------------------------------------------------------ */ /* hexdump - hex dump the filesystem block to the screen */ /* ------------------------------------------------------------------------ */ static int hexdump(uint64_t startaddr, int len) { const unsigned char *pointer,*ptr2; int i; uint64_t l; const char *lpBuffer = bh->b_data; int print_field, cursor_line; const uint32_t block_type = get_block_type(bh); strcpy(edit_fmt,"%02x"); pointer = (unsigned char *)lpBuffer + offset; ptr2 = (unsigned char *)lpBuffer + offset; l = offset; print_entry_ndx = 0; while (((termlines && line < termlines && line <= ((screen_chunk_size / 16) + 2)) || (!termlines && l < len)) && l < sbd.bsize) { int ptr_not_null = 0; if (termlines) { move(line, 0); COLORS_OFFSETS; /* cyan for offsets */ } if (startaddr < 0xffffffff) print_gfs2("%.8llx", startaddr + l); else print_gfs2("%.16llx", startaddr + l); if (termlines) { if (l < struct_len) COLORS_NORMAL; /* normal part of structure */ else if (gfs2_struct_type == GFS2_METATYPE_DI && l < struct_len + di.di_size) COLORS_CONTENTS; /* after struct but not eof */ else COLORS_SPECIAL; /* beyond end of the struct */ } print_field = -1; cursor_line = 0; for (i = 0; i < 16; i++) { /* first print it in hex */ /* Figure out if we have a null pointer--for colors */ if (((gfs2_struct_type == GFS2_METATYPE_IN) || (gfs2_struct_type == GFS2_METATYPE_DI && l < struct_len + di.di_size && (di.di_height > 0 || !S_ISREG(di.di_mode)))) && (i==0 || i==8)) { int j; ptr_not_null = 0; for (j = 0; j < 8; j++) { if (*(pointer + j)) { ptr_not_null = 1; break; } } } if (termlines) { if (l + i < struct_len) COLORS_NORMAL; /* in the structure */ else if (gfs2_struct_type == GFS2_METATYPE_DI && l + i < struct_len + di.di_size) { if ((!di.di_height && S_ISREG(di.di_mode)) || !ptr_not_null) COLORS_CONTENTS;/*stuff data */ else COLORS_SPECIAL;/* non-null */ } else if (gfs2_struct_type == GFS2_METATYPE_IN){ if (ptr_not_null) COLORS_SPECIAL;/* non-null */ else COLORS_CONTENTS;/* null */ } else COLORS_SPECIAL; /* past the struct */ } if (i%4 == 0) print_gfs2(" "); if (termlines && line == edit_row[dmode] + 3 && i == edit_col[dmode]) { COLORS_HIGHLIGHT; /* in the structure */ memset(estring,0,3); sprintf(estring,"%02x",*pointer); cursor_line = 1; print_field = (char *)pointer - bh->b_data; } print_gfs2("%02x",*pointer); if (termlines && line == edit_row[dmode] + 3 && i == edit_col[dmode]) { if (l < struct_len + offset) COLORS_NORMAL; /* in the structure */ else COLORS_SPECIAL; /* beyond structure */ } pointer++; } print_gfs2(" ["); for (i=0; i<16; i++) { /* now print it in character format */ if ((*ptr2 >=' ') && (*ptr2 <= '~')) print_gfs2("%c",*ptr2); else print_gfs2("."); ptr2++; } print_gfs2("] "); if (print_field >= 0) { const struct lgfs2_metadata *m = find_mtype(block_type, sbd.gfs1 ? LGFS2_MD_GFS1 : LGFS2_MD_GFS2); if (m) { const struct lgfs2_metafield *f; unsigned n; for (n = 0; n < m->nfields; n++) { f = &m->fields[n]; if (print_field >= f->offset && print_field < (f->offset + f->length)) { print_gfs2(m->fields[n].name); break; } } } } if (cursor_line) { if (block_type == GFS2_METATYPE_IN || ((block_type == GFS2_METATYPE_DI) && ((struct gfs2_dinode*)bh->b_data)->di_height) || S_ISDIR(di.di_mode)) { int ptroffset = edit_row[dmode] * 16 + edit_col[dmode]; if (ptroffset >= struct_len || pgnum) { int pnum; pnum = pgnum * screen_chunk_size; pnum += (ptroffset - struct_len); pnum /= sizeof(uint64_t); print_gfs2("pointer 0x%x", pnum); } } } if (line - 3 > last_entry_onscreen[dmode]) last_entry_onscreen[dmode] = line - 3; eol(0); l+=16; print_entry_ndx++; } /* while */ if (sbd.gfs1) { COLORS_NORMAL; print_gfs2(" *** This seems to be a GFS-1 file system ***"); eol(0); } return (offset+len); }/* hexdump */ /* ------------------------------------------------------------------------ */ /* masterblock - find a file (by name) in the master directory and return */ /* its block number. */ /* ------------------------------------------------------------------------ */ uint64_t masterblock(const char *fn) { int d; for (d = 2; d < 8; d++) if (!strncmp(masterdir.dirent[d].filename, fn, strlen(fn))) return (masterdir.dirent[d].block); return 0; } /* ------------------------------------------------------------------------ */ /* rgcount - return how many rgrps there are. */ /* ------------------------------------------------------------------------ */ static void rgcount(void) { printf("%lld RGs in this file system.\n", (unsigned long long)sbd.md.riinode->i_di.di_size / sizeof(struct gfs2_rindex)); inode_put(&sbd.md.riinode); gfs2_rgrp_free(&sbd.rgtree); exit(EXIT_SUCCESS); } /* ------------------------------------------------------------------------ */ /* find_rgrp_block - locate the block for a given rgrp number */ /* ------------------------------------------------------------------------ */ static uint64_t find_rgrp_block(struct gfs2_inode *dif, int rg) { int amt; struct gfs2_rindex fbuf, ri; uint64_t foffset, gfs1_adj = 0; foffset = rg * sizeof(struct gfs2_rindex); if (sbd.gfs1) { uint64_t sd_jbsize = (sbd.bsize - sizeof(struct gfs2_meta_header)); gfs1_adj = (foffset / sd_jbsize) * sizeof(struct gfs2_meta_header); gfs1_adj += sizeof(struct gfs2_meta_header); } amt = gfs2_readi(dif, (void *)&fbuf, foffset + gfs1_adj, sizeof(struct gfs2_rindex)); if (!amt) /* end of file */ return 0; gfs2_rindex_in(&ri, (void *)&fbuf); return ri.ri_addr; } /* ------------------------------------------------------------------------ */ /* gfs_rgrp_print - print a gfs1 resource group */ /* ------------------------------------------------------------------------ */ void gfs_rgrp_print(struct gfs_rgrp *rg) { gfs2_meta_header_print(&rg->rg_header); pv(rg, rg_flags, "%u", "0x%x"); pv(rg, rg_free, "%u", "0x%x"); pv(rg, rg_useddi, "%u", "0x%x"); pv(rg, rg_freedi, "%u", "0x%x"); gfs2_inum_print(&rg->rg_freedi_list); pv(rg, rg_usedmeta, "%u", "0x%x"); pv(rg, rg_freemeta, "%u", "0x%x"); } /* ------------------------------------------------------------------------ */ /* get_rg_addr */ /* ------------------------------------------------------------------------ */ static uint64_t get_rg_addr(int rgnum) { uint64_t rgblk = 0, gblock; struct gfs2_inode *riinode; if (sbd.gfs1) gblock = sbd1->sb_rindex_di.no_addr; else gblock = masterblock("rindex"); riinode = lgfs2_inode_read(&sbd, gblock); if (riinode == NULL) return 0; if (rgnum < riinode->i_di.di_size / sizeof(struct gfs2_rindex)) rgblk = find_rgrp_block(riinode, rgnum); else fprintf(stderr, "Error: File system only has %lld RGs.\n", (unsigned long long)riinode->i_di.di_size / sizeof(struct gfs2_rindex)); inode_put(&riinode); return rgblk; } /* ------------------------------------------------------------------------ */ /* set_rgrp_flags - Set an rgrp's flags to a given value */ /* rgnum: which rg to print or modify flags for (0 - X) */ /* new_flags: value to set new rg_flags to (if modify == TRUE) */ /* modify: TRUE if the value is to be modified, FALSE if it's to be printed */ /* full: TRUE if the full RG should be printed. */ /* ------------------------------------------------------------------------ */ static void set_rgrp_flags(int rgnum, uint32_t new_flags, int modify, int full) { union { struct gfs2_rgrp rg2; struct gfs_rgrp rg1; } rg; struct gfs2_buffer_head *rbh; uint64_t rgblk; rgblk = get_rg_addr(rgnum); rbh = bread(&sbd, rgblk); if (sbd.gfs1) gfs_rgrp_in(&rg.rg1, rbh); else gfs2_rgrp_in(&rg.rg2, rbh); if (modify) { printf("RG #%d (block %llu / 0x%llx) rg_flags changed from 0x%08x to 0x%08x\n", rgnum, (unsigned long long)rgblk, (unsigned long long)rgblk, rg.rg2.rg_flags, new_flags); rg.rg2.rg_flags = new_flags; if (sbd.gfs1) gfs_rgrp_out(&rg.rg1, rbh); else gfs2_rgrp_out_bh(&rg.rg2, rbh); brelse(rbh); } else { if (full) { print_gfs2("RG #%d", rgnum); print_gfs2(" located at: %llu (0x%llx)", rgblk, rgblk); eol(0); if (sbd.gfs1) gfs_rgrp_print(&rg.rg1); else gfs2_rgrp_print(&rg.rg2); } else printf("RG #%d (block %llu / 0x%llx) rg_flags = 0x%08x\n", rgnum, (unsigned long long)rgblk, (unsigned long long)rgblk, rg.rg2.rg_flags); brelse(rbh); } if (modify) fsync(sbd.device_fd); } /* ------------------------------------------------------------------------ */ /* has_indirect_blocks */ /* ------------------------------------------------------------------------ */ int has_indirect_blocks(void) { if (indirect_blocks || gfs2_struct_type == GFS2_METATYPE_SB || gfs2_struct_type == GFS2_METATYPE_LF || (gfs2_struct_type == GFS2_METATYPE_DI && (S_ISDIR(di.di_mode) || (sbd.gfs1 && di.__pad1 == GFS_FILE_DIR)))) return TRUE; return FALSE; } /* ------------------------------------------------------------------------ */ /* block_is_rindex */ /* ------------------------------------------------------------------------ */ int block_is_rindex(void) { if ((sbd.gfs1 && block == sbd1->sb_rindex_di.no_addr) || (block == masterblock("rindex"))) return TRUE; return FALSE; } /* ------------------------------------------------------------------------ */ /* block_is_jindex */ /* ------------------------------------------------------------------------ */ int block_is_jindex(void) { if ((sbd.gfs1 && block == sbd1->sb_jindex_di.no_addr)) return TRUE; return FALSE; } /* ------------------------------------------------------------------------ */ /* block_is_inum_file */ /* ------------------------------------------------------------------------ */ int block_is_inum_file(void) { if (!sbd.gfs1 && block == masterblock("inum")) return TRUE; return FALSE; } /* ------------------------------------------------------------------------ */ /* block_is_statfs_file */ /* ------------------------------------------------------------------------ */ int block_is_statfs_file(void) { if (sbd.gfs1 && block == gfs1_license_di.no_addr) return TRUE; if (!sbd.gfs1 && block == masterblock("statfs")) return TRUE; return FALSE; } /* ------------------------------------------------------------------------ */ /* block_is_quota_file */ /* ------------------------------------------------------------------------ */ int block_is_quota_file(void) { if (sbd.gfs1 && block == gfs1_quota_di.no_addr) return TRUE; if (!sbd.gfs1 && block == masterblock("quota")) return TRUE; return FALSE; } /* ------------------------------------------------------------------------ */ /* block_is_per_node */ /* ------------------------------------------------------------------------ */ int block_is_per_node(void) { if (!sbd.gfs1 && block == masterblock("per_node")) return TRUE; return FALSE; } /* ------------------------------------------------------------------------ */ /* block_has_extended_info */ /* ------------------------------------------------------------------------ */ static int block_has_extended_info(void) { if (has_indirect_blocks() || block_is_rindex() || block_is_rgtree() || block_is_jindex() || block_is_inum_file() || block_is_statfs_file() || block_is_quota_file()) return TRUE; return FALSE; } /* ------------------------------------------------------------------------ */ /* read_superblock - read the superblock */ /* ------------------------------------------------------------------------ */ static void read_superblock(int fd) { int count, sane; sbd1 = (struct gfs_sb *)&sbd.sd_sb; ioctl(fd, BLKFLSBUF, 0); memset(&sbd, 0, sizeof(struct gfs2_sbd)); sbd.bsize = GFS2_DEFAULT_BSIZE; sbd.device_fd = fd; bh = bread(&sbd, 0x10); sbd.jsize = GFS2_DEFAULT_JSIZE; sbd.rgsize = GFS2_DEFAULT_RGSIZE; sbd.qcsize = GFS2_DEFAULT_QCSIZE; sbd.time = time(NULL); sbd.rgtree.osi_node = NULL; gfs2_sb_in(&sbd.sd_sb, bh); /* parse it out into the sb structure */ /* Check to see if this is really gfs1 */ if (sbd1->sb_fs_format == GFS_FORMAT_FS && sbd1->sb_header.mh_type == GFS_METATYPE_SB && sbd1->sb_header.mh_format == GFS_FORMAT_SB && sbd1->sb_multihost_format == GFS_FORMAT_MULTI) { struct gfs_sb *sbbuf = (struct gfs_sb *)bh->b_data; sbd.gfs1 = TRUE; sbd1->sb_flags = be32_to_cpu(sbbuf->sb_flags); sbd1->sb_seg_size = be32_to_cpu(sbbuf->sb_seg_size); gfs2_inum_in(&sbd1->sb_rindex_di, (void *)&sbbuf->sb_rindex_di); gfs2_inum_in(&gfs1_quota_di, (void *)&sbbuf->sb_quota_di); gfs2_inum_in(&gfs1_license_di, (void *)&sbbuf->sb_license_di); } else sbd.gfs1 = FALSE; sbd.bsize = sbd.sd_sb.sb_bsize; if (!sbd.bsize) sbd.bsize = GFS2_DEFAULT_BSIZE; if (lgfs2_get_dev_info(fd, &sbd.dinfo)) { perror(sbd.device_name); exit(-1); } if(compute_constants(&sbd)) { fprintf(stderr, "Failed to compute constants.\n"); exit(-1); } if (sbd.gfs1 || (sbd.sd_sb.sb_header.mh_magic == GFS2_MAGIC && sbd.sd_sb.sb_header.mh_type == GFS2_METATYPE_SB)) block = 0x10 * (GFS2_DEFAULT_BSIZE / sbd.bsize); else { block = starting_blk = 0; } fix_device_geometry(&sbd); if(sbd.gfs1) { sbd.sd_inptrs = (sbd.bsize - sizeof(struct gfs_indirect)) / sizeof(uint64_t); sbd.sd_diptrs = (sbd.bsize - sizeof(struct gfs_dinode)) / sizeof(uint64_t); sbd.md.riinode = lgfs2_inode_read(&sbd, sbd1->sb_rindex_di.no_addr); } else { sbd.sd_inptrs = (sbd.bsize - sizeof(struct gfs2_meta_header)) / sizeof(uint64_t); sbd.sd_diptrs = (sbd.bsize - sizeof(struct gfs2_dinode)) / sizeof(uint64_t); sbd.master_dir = lgfs2_inode_read(&sbd, sbd.sd_sb.sb_master_dir.no_addr); if (sbd.master_dir == NULL) { sbd.md.riinode = NULL; } else { gfs2_lookupi(sbd.master_dir, "rindex", 6, &sbd.md.riinode); } } sbd.fssize = sbd.device.length; if (sbd.md.riinode) /* If we found the rindex */ rindex_read(&sbd, 0, &count, &sane); } /* ------------------------------------------------------------------------ */ /* read_master_dir - read the master directory */ /* ------------------------------------------------------------------------ */ static void read_master_dir(void) { ioctl(sbd.device_fd, BLKFLSBUF, 0); lseek(sbd.device_fd, sbd.sd_sb.sb_master_dir.no_addr * sbd.bsize, SEEK_SET); if (read(sbd.device_fd, bh->b_data, sbd.bsize) != sbd.bsize) { fprintf(stderr, "read error: %s from %s:%d: " "master dir block %lld (0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)sbd.sd_sb.sb_master_dir.no_addr, (unsigned long long)sbd.sd_sb.sb_master_dir.no_addr); exit(-1); } gfs2_dinode_in(&di, bh); /* parse disk inode into structure */ do_dinode_extended(&di, bh); /* get extended data, if any */ memcpy(&masterdir, &indirect[0], sizeof(struct indirect_info)); } /* ------------------------------------------------------------------------ */ /* display */ /* ------------------------------------------------------------------------ */ int display(int identify_only) { uint64_t blk; if (block == RGLIST_DUMMY_BLOCK) { if (sbd.gfs1) blk = sbd1->sb_rindex_di.no_addr; else blk = masterblock("rindex"); } else blk = block; if (termlines) { display_title_lines(); move(2,0); } if (block_in_mem != blk) { /* If we changed blocks from the last read */ dev_offset = blk * sbd.bsize; ioctl(sbd.device_fd, BLKFLSBUF, 0); if (!(bh = bread(&sbd, blk))) { fprintf(stderr, "read error: %s from %s:%d: " "offset %lld (0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)dev_offset, (unsigned long long)dev_offset); exit(-1); } block_in_mem = blk; /* remember which block is in memory */ } line = 1; gfs2_struct_type = display_block_type(FALSE); if (identify_only) return 0; indirect_blocks = 0; lines_per_row[dmode] = 1; if (gfs2_struct_type == GFS2_METATYPE_SB || blk == 0x10 * (4096 / sbd.bsize)) { gfs2_sb_in(&sbd.sd_sb, bh); /* parse it out into the sb structure */ memset(indirect, 0, sizeof(struct iinfo)); indirect->ii[0].block = sbd.sd_sb.sb_master_dir.no_addr; indirect->ii[0].is_dir = TRUE; indirect->ii[0].dirents = 2; memcpy(&indirect->ii[0].dirent[0].filename, "root", 4); indirect->ii[0].dirent[0].dirent.de_inum.no_formal_ino = sbd.sd_sb.sb_root_dir.no_formal_ino; indirect->ii[0].dirent[0].dirent.de_inum.no_addr = sbd.sd_sb.sb_root_dir.no_addr; indirect->ii[0].dirent[0].block = sbd.sd_sb.sb_root_dir.no_addr; indirect->ii[0].dirent[0].dirent.de_type = DT_DIR; memcpy(&indirect->ii[0].dirent[1].filename, "master", 7); indirect->ii[0].dirent[1].dirent.de_inum.no_formal_ino = sbd.sd_sb.sb_master_dir.no_formal_ino; indirect->ii[0].dirent[1].dirent.de_inum.no_addr = sbd.sd_sb.sb_master_dir.no_addr; indirect->ii[0].dirent[1].block = sbd.sd_sb.sb_master_dir.no_addr; indirect->ii[0].dirent[1].dirent.de_type = DT_DIR; } else if (gfs2_struct_type == GFS2_METATYPE_DI) { gfs2_dinode_in(&di, bh); /* parse disk inode into structure */ do_dinode_extended(&di, bh); /* get extended data, if any */ } else if (gfs2_struct_type == GFS2_METATYPE_IN) { /* indirect block list */ if (blockhist) { int i; for (i = 0; i < 512; i++) memcpy(&indirect->ii[i].mp, &blockstack[blockhist - 1].mp, sizeof(struct metapath)); } indirect_blocks = do_indirect_extended(bh->b_data, indirect); } else if (gfs2_struct_type == GFS2_METATYPE_LF) { /* directory leaf */ do_leaf_extended(bh->b_data, indirect); } last_entry_onscreen[dmode] = 0; if (dmode == EXTENDED_MODE && !block_has_extended_info()) dmode = HEX_MODE; if (termlines) { move(termlines, 63); if (dmode==HEX_MODE) printw("Mode: Hex %s", (editing?"edit ":"view ")); else printw("Mode: %s", (dmode==GFS2_MODE?"Structure": "Pointers ")); move(line, 0); } if (dmode == HEX_MODE) /* if hex display mode */ hexdump(dev_offset, (gfs2_struct_type == GFS2_METATYPE_DI)? struct_len + di.di_size:sbd.bsize); else if (dmode == GFS2_MODE) /* if structure display */ display_gfs2(); /* display the gfs2 structure */ else display_extended(); /* display extended blocks */ /* No else here because display_extended can switch back to hex mode */ if (termlines) refresh(); return(0); } /* ------------------------------------------------------------------------ */ /* push_block - push a block onto the block stack */ /* ------------------------------------------------------------------------ */ static void push_block(uint64_t blk) { int i, bhst; bhst = blockhist % BLOCK_STACK_SIZE; if (blk) { blockstack[bhst].dmode = dmode; for (i = 0; i < DMODES; i++) { blockstack[bhst].start_row[i] = start_row[i]; blockstack[bhst].end_row[i] = end_row[i]; blockstack[bhst].edit_row[i] = edit_row[i]; blockstack[bhst].edit_col[i] = edit_col[i]; blockstack[bhst].lines_per_row[i] = lines_per_row[i]; } blockstack[bhst].gfs2_struct_type = gfs2_struct_type; if (edit_row[dmode] >= 0 && !block_is_rindex()) memcpy(&blockstack[bhst].mp, &indirect->ii[edit_row[dmode]].mp, sizeof(struct metapath)); blockhist++; blockstack[blockhist % BLOCK_STACK_SIZE].block = blk; } } /* ------------------------------------------------------------------------ */ /* pop_block - pop a block off the block stack */ /* ------------------------------------------------------------------------ */ static uint64_t pop_block(void) { int i, bhst; if (!blockhist) return block; blockhist--; bhst = blockhist % BLOCK_STACK_SIZE; dmode = blockstack[bhst].dmode; for (i = 0; i < DMODES; i++) { start_row[i] = blockstack[bhst].start_row[i]; end_row[i] = blockstack[bhst].end_row[i]; edit_row[i] = blockstack[bhst].edit_row[i]; edit_col[i] = blockstack[bhst].edit_col[i]; lines_per_row[i] = blockstack[bhst].lines_per_row[i]; } gfs2_struct_type = blockstack[bhst].gfs2_struct_type; return blockstack[bhst].block; } /* ------------------------------------------------------------------------ */ /* find_journal_block - figure out where a journal starts, given the name */ /* Returns: journal block number, changes j_size to the journal size */ /* ------------------------------------------------------------------------ */ static uint64_t find_journal_block(const char *journal, uint64_t *j_size) { int journal_num; uint64_t jindex_block, jblock = 0; int amtread; struct gfs2_buffer_head *jindex_bh, *j_bh; char jbuf[sbd.bsize]; journal_num = atoi(journal + 7); /* Figure out the block of the jindex file */ if (sbd.gfs1) jindex_block = sbd1->sb_jindex_di.no_addr; else jindex_block = masterblock("jindex"); /* read in the block */ jindex_bh = bread(&sbd, jindex_block); /* get the dinode data from it. */ gfs2_dinode_in(&di, jindex_bh); /* parse disk inode to struct*/ if (!sbd.gfs1) do_dinode_extended(&di, jindex_bh); /* parse dir. */ if (sbd.gfs1) { struct gfs2_inode *jiinode; struct gfs_jindex ji; jiinode = lgfs2_inode_get(&sbd, jindex_bh); if (jiinode == NULL) return 0; amtread = gfs2_readi(jiinode, (void *)&jbuf, journal_num * sizeof(struct gfs_jindex), sizeof(struct gfs_jindex)); if (amtread) { gfs_jindex_in(&ji, jbuf); jblock = ji.ji_addr; *j_size = ji.ji_nsegment * 0x10; } inode_put(&jiinode); } else { struct gfs2_dinode jdi; jblock = indirect->ii[0].dirent[journal_num + 2].block; j_bh = bread(&sbd, jblock); gfs2_dinode_in(&jdi, j_bh);/* parse dinode to struct */ *j_size = jdi.di_size; brelse(j_bh); } brelse(jindex_bh); return jblock; } /* ------------------------------------------------------------------------ */ /* Find next metadata block of a given type AFTER a given point in the fs */ /* */ /* This is used to find blocks that aren't represented in the bitmaps, such */ /* as the RGs and bitmaps or the superblock. */ /* ------------------------------------------------------------------------ */ static uint64_t find_metablockoftype_slow(uint64_t startblk, int metatype, int print) { uint64_t blk, last_fs_block; int found = 0; struct gfs2_buffer_head *lbh; last_fs_block = lseek(sbd.device_fd, 0, SEEK_END) / sbd.bsize; for (blk = startblk + 1; blk < last_fs_block; blk++) { lbh = bread(&sbd, blk); /* Can't use get_block_type here (returns false "none") */ if (lbh->b_data[0] == 0x01 && lbh->b_data[1] == 0x16 && lbh->b_data[2] == 0x19 && lbh->b_data[3] == 0x70 && lbh->b_data[4] == 0x00 && lbh->b_data[5] == 0x00 && lbh->b_data[6] == 0x00 && lbh->b_data[7] == metatype) { found = 1; brelse(lbh); break; } brelse(lbh); } if (!found) blk = 0; if (print) { if (dmode == HEX_MODE) printf("0x%llx\n", (unsigned long long)blk); else printf("%llu\n", (unsigned long long)blk); } gfs2_rgrp_free(&sbd.rgtree); if (print) exit(0); return blk; } static int find_rg_metatype(struct rgrp_tree *rgd, uint64_t *blk, uint64_t startblk, int mtype) { int found; unsigned i, j, m; struct gfs2_buffer_head *bhp = NULL; uint64_t *ibuf = malloc(sbd.bsize * GFS2_NBBY * sizeof(uint64_t)); for (i = 0; i < rgd->ri.ri_length; i++) { m = lgfs2_bm_scan(rgd, i, ibuf, GFS2_BLKST_DINODE); for (j = 0; j < m; j++) { *blk = ibuf[j]; bhp = bread(&sbd, *blk); found = (*blk > startblk) && !gfs2_check_meta(bhp, mtype); brelse(bhp); if (found) { free(ibuf); return 0; } } } free(ibuf); return -1; } /* ------------------------------------------------------------------------ */ /* Find next "metadata in use" block AFTER a given point in the fs */ /* */ /* This version does its magic by searching the bitmaps of the RG. After */ /* all, if we're searching for a dinode, we want a real allocated inode, */ /* not just some block that used to be an inode in a previous incarnation. */ /* ------------------------------------------------------------------------ */ static uint64_t find_metablockoftype_rg(uint64_t startblk, int metatype, int print) { struct osi_node *next = NULL; uint64_t blk, errblk; int first = 1, found = 0; struct rgrp_tree *rgd; struct gfs2_rindex *ri; blk = 0; /* Skip the rgs prior to the block we've been given */ for (next = osi_first(&sbd.rgtree); next; next = osi_next(next)) { rgd = (struct rgrp_tree *)next; ri = &rgd->ri; if (first && startblk <= ri->ri_data0) { startblk = ri->ri_data0; break; } else if (ri->ri_addr <= startblk && startblk < ri->ri_data0 + ri->ri_data) break; else rgd = NULL; first = 0; } if (!rgd) { if (print) printf("0\n"); gfs2_rgrp_free(&sbd.rgtree); if (print) exit(-1); } for (; !found && next; next = osi_next(next)){ rgd = (struct rgrp_tree *)next; errblk = gfs2_rgrp_read(&sbd, rgd); if (errblk) continue; found = !find_rg_metatype(rgd, &blk, startblk, metatype); if (found) break; gfs2_rgrp_relse(rgd); } if (!found) blk = 0; if (print) { if (dmode == HEX_MODE) printf("0x%llx\n", (unsigned long long)blk); else printf("%llu\n", (unsigned long long)blk); } gfs2_rgrp_free(&sbd.rgtree); if (print) exit(0); return blk; } /* ------------------------------------------------------------------------ */ /* Find next metadata block AFTER a given point in the fs */ /* ------------------------------------------------------------------------ */ static uint64_t find_metablockoftype(const char *strtype, int print) { int mtype = 0; uint64_t startblk, blk = 0; if (print) startblk = blockstack[blockhist % BLOCK_STACK_SIZE].block; else startblk = block; for (mtype = GFS2_METATYPE_NONE; mtype <= GFS2_METATYPE_QC; mtype++) if (!strcasecmp(strtype, mtypes[mtype])) break; if (!strcmp(strtype, "dinode")) mtype = GFS2_METATYPE_DI; if (mtype >= GFS2_METATYPE_NONE && mtype <= GFS2_METATYPE_RB) blk = find_metablockoftype_slow(startblk, mtype, print); else if (mtype >= GFS2_METATYPE_DI && mtype <= GFS2_METATYPE_QC) blk = find_metablockoftype_rg(startblk, mtype, print); else if (print) { fprintf(stderr, "Error: metadata type not " "specified: must be one of:\n"); fprintf(stderr, "sb rg rb di in lf jd lh ld" " ea ed lb 13 qc\n"); gfs2_rgrp_free(&sbd.rgtree); exit(-1); } return blk; } /* ------------------------------------------------------------------------ */ /* Check if the word is a keyword such as "sb" or "rindex" */ /* Returns: block number if it is, else 0 */ /* ------------------------------------------------------------------------ */ uint64_t check_keywords(const char *kword) { unsigned long long blk = 0; if (!strcmp(kword, "sb") ||!strcmp(kword, "superblock")) blk = 0x10 * (4096 / sbd.bsize); /* superblock */ else if (!strcmp(kword, "root") || !strcmp(kword, "rootdir")) blk = sbd.sd_sb.sb_root_dir.no_addr; else if (!strcmp(kword, "master")) { if (sbd.gfs1) fprintf(stderr, "This is GFS1; there's no master directory.\n"); else if (!sbd.sd_sb.sb_master_dir.no_addr) { fprintf(stderr, "GFS2 master directory not found on %s\n", sbd.device_name); exit(-1); } else blk = sbd.sd_sb.sb_master_dir.no_addr; } else if (!strcmp(kword, "jindex")) { if (sbd.gfs1) blk = sbd1->sb_jindex_di.no_addr; else blk = masterblock("jindex"); /* journal index */ } else if (!sbd.gfs1 && !strcmp(kword, "per_node")) blk = masterblock("per_node"); else if (!sbd.gfs1 && !strcmp(kword, "inum")) blk = masterblock("inum"); else if (!strcmp(kword, "statfs")) { if (sbd.gfs1) blk = gfs1_license_di.no_addr; else blk = masterblock("statfs"); } else if (!strcmp(kword, "rindex") || !strcmp(kword, "rgindex")) { if (sbd.gfs1) blk = sbd1->sb_rindex_di.no_addr; else blk = masterblock("rindex"); } else if (!strcmp(kword, "rgs")) { blk = RGLIST_DUMMY_BLOCK; } else if (!strcmp(kword, "quota")) { if (sbd.gfs1) blk = gfs1_quota_di.no_addr; else blk = masterblock("quota"); } else if (!strncmp(kword, "rg ", 3)) { int rgnum = 0; rgnum = atoi(kword + 3); blk = get_rg_addr(rgnum); } else if (!strncmp(kword, "journal", 7) && isdigit(kword[7])) { uint64_t j_size; blk = find_journal_block(kword, &j_size); } else if (kword[0]=='/') /* search */ blk = find_metablockoftype(&kword[1], 0); else if (kword[0]=='0' && kword[1]=='x') /* hex addr */ sscanf(kword, "%llx", &blk);/* retrieve in hex */ else sscanf(kword, "%llu", &blk); /* retrieve decimal */ return blk; } /* ------------------------------------------------------------------------ */ /* goto_block - go to a desired block entered by the user */ /* ------------------------------------------------------------------------ */ static uint64_t goto_block(void) { char string[256]; int ch, delta; memset(string, 0, sizeof(string)); sprintf(string,"%lld", (long long)block); if (bobgets(string, 1, 7, 16, &ch)) { if (isalnum(string[0]) || string[0] == '/') temp_blk = check_keywords(string); else if (string[0] == '+' || string[0] == '-') { if (string[1] == '0' && string[2] == 'x') sscanf(string, "%x", &delta); else sscanf(string, "%d", &delta); temp_blk = block + delta; } if (temp_blk == RGLIST_DUMMY_BLOCK || temp_blk < max_block) { offset = 0; block = temp_blk; push_block(block); } } return block; } /* ------------------------------------------------------------------------ */ /* init_colors */ /* ------------------------------------------------------------------------ */ static void init_colors(void) { if (color_scheme) { init_pair(COLOR_TITLE, COLOR_BLACK, COLOR_CYAN); init_pair(COLOR_NORMAL, COLOR_WHITE, COLOR_BLACK); init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE); init_pair(COLOR_SPECIAL, COLOR_RED, COLOR_BLACK); init_pair(COLOR_HIGHLIGHT, COLOR_GREEN, COLOR_BLACK); init_pair(COLOR_OFFSETS, COLOR_CYAN, COLOR_BLACK); init_pair(COLOR_CONTENTS, COLOR_YELLOW, COLOR_BLACK); } else { init_pair(COLOR_TITLE, COLOR_BLACK, COLOR_CYAN); init_pair(COLOR_NORMAL, COLOR_BLACK, COLOR_WHITE); init_pair(COLOR_INVERSE, COLOR_WHITE, COLOR_BLACK); init_pair(COLOR_SPECIAL, COLOR_MAGENTA, COLOR_WHITE); init_pair(COLOR_HIGHLIGHT, COLOR_RED, COLOR_WHITE); /*cursor*/ init_pair(COLOR_OFFSETS, COLOR_CYAN, COLOR_WHITE); init_pair(COLOR_CONTENTS, COLOR_BLUE, COLOR_WHITE); } } /* ------------------------------------------------------------------------ */ /* hex_edit - Allow the user to edit the page by entering hex digits */ /* ------------------------------------------------------------------------ */ static void hex_edit(int *exitch) { int left_off; int ch; left_off = ((block * sbd.bsize) < 0xffffffff) ? 9 : 17; /* 8 and 16 char addresses on screen */ if (bobgets(estring, edit_row[HEX_MODE] + 3, (edit_col[HEX_MODE] * 2) + (edit_col[HEX_MODE] / 4) + left_off, 2, exitch)) { if (strstr(edit_fmt,"X") || strstr(edit_fmt,"x")) { int hexoffset; int i, sl = strlen(estring); for (i = 0; i < sl; i+=2) { hexoffset = (edit_row[HEX_MODE] * 16) + edit_col[HEX_MODE] + (i / 2); ch = 0x00; if (isdigit(estring[i])) ch = (estring[i] - '0') * 0x10; else if (estring[i] >= 'a' && estring[i] <= 'f') ch = (estring[i]-'a' + 0x0a)*0x10; else if (estring[i] >= 'A' && estring[i] <= 'F') ch = (estring[i] - 'A' + 0x0a) * 0x10; if (isdigit(estring[i+1])) ch += (estring[i+1] - '0'); else if (estring[i+1] >= 'a' && estring[i+1] <= 'f') ch += (estring[i+1] - 'a' + 0x0a); else if (estring[i+1] >= 'A' && estring[i+1] <= 'F') ch += (estring[i+1] - 'A' + 0x0a); bh->b_data[offset + hexoffset] = ch; } lseek(sbd.device_fd, dev_offset, SEEK_SET); if (write(sbd.device_fd, bh->b_data, sbd.bsize) != sbd.bsize) { fprintf(stderr, "write error: %s from %s:%d: " "offset %lld (0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)dev_offset, (unsigned long long)dev_offset); exit(-1); } fsync(sbd.device_fd); } } } /* ------------------------------------------------------------------------ */ /* page up */ /* ------------------------------------------------------------------------ */ static void pageup(void) { if (dmode == EXTENDED_MODE) { if (edit_row[dmode] - (dsplines / lines_per_row[dmode]) > 0) edit_row[dmode] -= (dsplines / lines_per_row[dmode]); else edit_row[dmode] = 0; if (start_row[dmode] - (dsplines / lines_per_row[dmode]) > 0) start_row[dmode] -= (dsplines / lines_per_row[dmode]); else start_row[dmode] = 0; } else { start_row[dmode] = edit_row[dmode] = 0; if (dmode == GFS2_MODE || offset==0) { block--; if (dmode == HEX_MODE) offset = (sbd.bsize % screen_chunk_size) > 0 ? screen_chunk_size * (sbd.bsize / screen_chunk_size) : sbd.bsize - screen_chunk_size; else offset = 0; } else offset -= screen_chunk_size; } } /* ------------------------------------------------------------------------ */ /* page down */ /* ------------------------------------------------------------------------ */ static void pagedn(void) { if (dmode == EXTENDED_MODE) { if ((edit_row[dmode] + dsplines) / lines_per_row[dmode] + 1 <= end_row[dmode]) { start_row[dmode] += dsplines / lines_per_row[dmode]; edit_row[dmode] += dsplines / lines_per_row[dmode]; } else { edit_row[dmode] = end_row[dmode] - 1; while (edit_row[dmode] - start_row[dmode] + 1 > last_entry_onscreen[dmode]) start_row[dmode]++; } } else { start_row[dmode] = edit_row[dmode] = 0; if (dmode == GFS2_MODE || offset + screen_chunk_size >= sbd.bsize) { block++; offset = 0; } else offset += screen_chunk_size; } } /* ------------------------------------------------------------------------ */ /* jump - jump to the address the cursor is on */ /* ------------------------------------------------------------------------ */ static void jump(void) { if (dmode == HEX_MODE) { unsigned int col2; uint64_t *b; if (edit_row[dmode] >= 0) { col2 = edit_col[dmode] & 0x08;/* thus 0-7->0, 8-15->8 */ b = (uint64_t *)&bh->b_data[edit_row[dmode]*16 + offset + col2]; temp_blk=be64_to_cpu(*b); } } else sscanf(estring, "%"SCNx64, &temp_blk);/* retrieve in hex */ if (temp_blk < max_block) { /* if the block number is valid */ int i; offset = 0; push_block(temp_blk); block = temp_blk; for (i = 0; i < DMODES; i++) { start_row[i] = end_row[i] = edit_row[i] = 0; edit_col[i] = 0; } } } /* ------------------------------------------------------------------------ */ /* print block type */ /* ------------------------------------------------------------------------ */ static void print_block_type(uint64_t tblock, int type, const char *additional) { if (type <= GFS2_METATYPE_QC) printf("%d (Block %lld is type %d: %s%s)\n", type, (unsigned long long)tblock, type, block_type_str[type], additional); else printf("%d (Block %lld is type %d: unknown%s)\n", type, (unsigned long long)tblock, type, additional); } /* ------------------------------------------------------------------------ */ /* find_print block type */ /* ------------------------------------------------------------------------ */ static void find_print_block_type(void) { uint64_t tblock; struct gfs2_buffer_head *lbh; int type; tblock = blockstack[blockhist % BLOCK_STACK_SIZE].block; lbh = bread(&sbd, tblock); type = get_block_type(lbh); print_block_type(tblock, type, ""); brelse(lbh); gfs2_rgrp_free(&sbd.rgtree); exit(0); } /* ------------------------------------------------------------------------ */ /* Find and print the resource group associated with a given block */ /* ------------------------------------------------------------------------ */ static void find_print_block_rg(int bitmap) { uint64_t rblock, rgblock; int i; struct rgrp_tree *rgd; rblock = blockstack[blockhist % BLOCK_STACK_SIZE].block; if (rblock == sbd.sb_addr) printf("0 (the superblock is not in the bitmap)\n"); else { rgd = gfs2_blk2rgrpd(&sbd, rblock); if (rgd) { rgblock = rgd->ri.ri_addr; if (bitmap) { struct gfs2_bitmap *bits = NULL; for (i = 0; i < rgd->ri.ri_length; i++) { bits = &(rgd->bits[i]); if (rblock - rgd->ri.ri_data0 < ((bits->bi_start + bits->bi_len) * GFS2_NBBY)) { break; } } if (i < rgd->ri.ri_length) rgblock += i; } if (dmode == HEX_MODE) printf("0x%llx\n",(unsigned long long)rgblock); else printf("%llu\n", (unsigned long long)rgblock); } else { printf("-1 (block invalid or part of an rgrp).\n"); } } gfs2_rgrp_free(&sbd.rgtree); exit(0); } /* ------------------------------------------------------------------------ */ /* find/change/print block allocation (what the bitmap says about block) */ /* ------------------------------------------------------------------------ */ static void find_change_block_alloc(int *newval) { uint64_t ablock; int type; struct rgrp_tree *rgd; if (newval && (*newval < GFS2_BLKST_FREE || *newval > GFS2_BLKST_DINODE)) { int i; printf("Error: value %d is not valid.\nValid values are:\n", *newval); for (i = GFS2_BLKST_FREE; i <= GFS2_BLKST_DINODE; i++) printf("%d - %s\n", i, allocdesc[sbd.gfs1][i]); gfs2_rgrp_free(&sbd.rgtree); exit(-1); } ablock = blockstack[blockhist % BLOCK_STACK_SIZE].block; if (ablock == sbd.sb_addr) printf("3 (the superblock is not in the bitmap)\n"); else { rgd = gfs2_blk2rgrpd(&sbd, ablock); if (rgd) { gfs2_rgrp_read(&sbd, rgd); if (newval) { if (gfs2_set_bitmap(&sbd, ablock, *newval)) printf("-1 (block invalid or part of an rgrp).\n"); else printf("%d\n", *newval); } else { type = lgfs2_get_bitmap(&sbd, ablock, rgd); if (type < 0) { printf("-1 (block invalid or part of " "an rgrp).\n"); exit(-1); } printf("%d (%s)\n", type, allocdesc[sbd.gfs1][type]); } gfs2_rgrp_relse(rgd); } else { gfs2_rgrp_free(&sbd.rgtree); printf("-1 (block invalid or part of an rgrp).\n"); exit(-1); } } gfs2_rgrp_free(&sbd.rgtree); if (newval) fsync(sbd.device_fd); exit(0); } /* ------------------------------------------------------------------------ */ /* process request to print a certain field from a previously pushed block */ /* ------------------------------------------------------------------------ */ static void process_field(const char *field, const char *nstr) { uint64_t fblock; struct gfs2_buffer_head *rbh; int type; struct gfs2_rgrp rg; struct gfs2_leaf leaf; struct gfs2_sb lsb; struct gfs2_log_header lh; struct gfs2_log_descriptor ld; struct gfs2_quota_change qc; int setval = 0, setstring = 0; uint64_t newval = 0; if (nstr[0] == '/') { setval = 0; } else if (nstr[0] == '0' && nstr[1] == 'x') { sscanf(nstr, "%"SCNx64, &newval); setval = 1; } else { newval = (uint64_t)atoll(nstr); setval = 1; } if (setval && newval == 0 && nstr[0] != '0') setstring = 1; fblock = blockstack[blockhist % BLOCK_STACK_SIZE].block; rbh = bread(&sbd, fblock); type = get_block_type(rbh); switch (type) { case GFS2_METATYPE_SB: gfs2_sb_in(&lsb, rbh); if (setval) { if (setstring) gfs2_sb_assigns(&lsb, field, nstr); else gfs2_sb_assignval(&lsb, field, newval); gfs2_sb_out(&lsb, rbh); if (!termlines) gfs2_sb_printval(&lsb, field); } else { if (!termlines && gfs2_sb_printval(&lsb, field)) printf("Field '%s' not found.\n", field); } break; case GFS2_METATYPE_RG: gfs2_rgrp_in(&rg, rbh); if (setval) { gfs2_rgrp_assignval(&rg, field, newval); gfs2_rgrp_out_bh(&rg, rbh); if (!termlines) gfs2_rgrp_printval(&rg, field); } else { if (!termlines && gfs2_rgrp_printval(&rg, field)) printf("Field '%s' not found.\n", field); } break; case GFS2_METATYPE_RB: if (!termlines) print_block_type(fblock, type, " which is not implemented"); break; case GFS2_METATYPE_DI: gfs2_dinode_in(&di, rbh); if (setval) { gfs2_dinode_assignval(&di, field, newval); gfs2_dinode_out(&di, rbh); if (!termlines) gfs2_dinode_printval(&di, field); } else { if (!termlines && gfs2_dinode_printval(&di, field)) printf("Field '%s' not found.\n", field); } break; case GFS2_METATYPE_IN: if (!setval && !setstring) print_block_type(fblock, type, " which is not implemented"); break; case GFS2_METATYPE_LF: gfs2_leaf_in(&leaf, rbh); if (setval) { if (setstring) gfs2_leaf_assigns(&leaf, field, nstr); else gfs2_leaf_assignval(&leaf, field, newval); gfs2_leaf_out(&leaf, rbh); if (!termlines) gfs2_leaf_printval(&leaf, field); } else { if (!termlines && gfs2_leaf_printval(&leaf, field)) printf("Field '%s' not found.\n", field); } break; case GFS2_METATYPE_LH: gfs2_log_header_in(&lh, rbh); if (setval) { gfs2_lh_assignval(&lh, field, newval); gfs2_log_header_out(&lh, rbh); if (!termlines) gfs2_lh_printval(&lh, field); } else { if (!termlines && gfs2_lh_printval(&lh, field)) printf("Field '%s' not found.\n", field); } break; case GFS2_METATYPE_LD: gfs2_log_descriptor_in(&ld, rbh); if (setval) { if (setstring) gfs2_ld_assigns(&ld, field, nstr); else gfs2_ld_assignval(&ld, field, newval); gfs2_log_descriptor_out(&ld, rbh); if (!termlines) gfs2_ld_printval(&ld, field); } else { if (!termlines && gfs2_ld_printval(&ld, field)) printf("Field '%s' not found.\n", field); } break; case GFS2_METATYPE_QC: gfs2_quota_change_in(&qc, rbh); if (setval) { gfs2_qc_assignval(&qc, field, newval); gfs2_quota_change_out(&qc, rbh); if (!termlines) gfs2_qc_printval(&qc, field); } else { if (!termlines && gfs2_qc_printval(&qc, field)) printf("Field '%s' not found.\n", field); } break; case GFS2_METATYPE_JD: /* journaled data */ case GFS2_METATYPE_EA: /* extended attribute */ case GFS2_METATYPE_ED: /* extended attribute */ case GFS2_METATYPE_LB: default: if (!termlines) print_block_type(fblock, type, " which is not implemented"); break; } brelse(rbh); fsync(sbd.device_fd); exit(0); } /* ------------------------------------------------------------------------ */ /* interactive_mode - accept keystrokes from user and display structures */ /* ------------------------------------------------------------------------ */ static void interactive_mode(void) { int ch = 0, Quit; if ((wind = initscr()) == NULL) { fprintf(stderr, "Error: unable to initialize screen."); eol(0); exit(-1); } getmaxyx(stdscr, termlines, termcols); termlines--; /* Do our initial screen stuff: */ clear(); /* don't use Erase */ start_color(); noecho(); keypad(stdscr, TRUE); raw(); curs_set(0); init_colors(); /* Accept keystrokes and act on them accordingly */ Quit = FALSE; editing = FALSE; while (!Quit) { display(FALSE); if (editing) { if (edit_row[dmode] == -1) block = goto_block(); else { if (dmode == HEX_MODE) hex_edit(&ch); else if (dmode == GFS2_MODE) { bobgets(estring, edit_row[dmode]+4, 24, 10, &ch); process_field(efield, estring); block_in_mem = -1; } else bobgets(estring, edit_row[dmode]+6, 14, edit_size[dmode], &ch); } } else while ((ch=getch()) == 0); // wait for input switch (ch) { /* --------------------------------------------------------- */ /* escape or 'q' */ /* --------------------------------------------------------- */ case 0x1b: case 0x03: case 'q': if (editing) editing = FALSE; else Quit=TRUE; break; /* --------------------------------------------------------- */ /* home - return to the superblock */ /* --------------------------------------------------------- */ case KEY_HOME: if (dmode == EXTENDED_MODE) { start_row[dmode] = end_row[dmode] = 0; edit_row[dmode] = 0; } else { block = 0x10 * (4096 / sbd.bsize); push_block(block); offset = 0; } break; /* --------------------------------------------------------- */ /* backspace - return to the previous block on the stack */ /* --------------------------------------------------------- */ case KEY_BACKSPACE: case 0x7f: block = pop_block(); offset = 0; break; /* --------------------------------------------------------- */ /* space - go down the block stack (opposite of backspace) */ /* --------------------------------------------------------- */ case ' ': blockhist++; block = blockstack[blockhist % BLOCK_STACK_SIZE].block; offset = 0; break; /* --------------------------------------------------------- */ /* arrow up */ /* --------------------------------------------------------- */ case KEY_UP: case '-': if (dmode == EXTENDED_MODE) { if (edit_row[dmode] > 0) edit_row[dmode]--; if (edit_row[dmode] < start_row[dmode]) start_row[dmode] = edit_row[dmode]; } else { if (edit_row[dmode] >= 0) edit_row[dmode]--; } break; /* --------------------------------------------------------- */ /* arrow down */ /* --------------------------------------------------------- */ case KEY_DOWN: case '+': if (dmode == EXTENDED_MODE) { if (edit_row[dmode] + 1 < end_row[dmode]) { if (edit_row[dmode] - start_row[dmode] + 1 > last_entry_onscreen[dmode]) start_row[dmode]++; edit_row[dmode]++; } } else { if (edit_row[dmode] < last_entry_onscreen[dmode]) edit_row[dmode]++; } break; /* --------------------------------------------------------- */ /* arrow left */ /* --------------------------------------------------------- */ case KEY_LEFT: if (dmode == HEX_MODE) { if (edit_col[dmode] > 0) edit_col[dmode]--; else edit_col[dmode] = 15; } break; /* --------------------------------------------------------- */ /* arrow right */ /* --------------------------------------------------------- */ case KEY_RIGHT: if (dmode == HEX_MODE) { if (edit_col[dmode] < 15) edit_col[dmode]++; else edit_col[dmode] = 0; } break; /* --------------------------------------------------------- */ /* m - change display mode key */ /* --------------------------------------------------------- */ case 'm': dmode = ((dmode + 1) % DMODES); break; /* --------------------------------------------------------- */ /* J - Jump to highlighted block number */ /* --------------------------------------------------------- */ case 'j': jump(); break; /* --------------------------------------------------------- */ /* g - goto block */ /* --------------------------------------------------------- */ case 'g': block = goto_block(); break; /* --------------------------------------------------------- */ /* h - help key */ /* --------------------------------------------------------- */ case 'h': print_usage(); break; /* --------------------------------------------------------- */ /* e - change to extended mode */ /* --------------------------------------------------------- */ case 'e': dmode = EXTENDED_MODE; break; /* --------------------------------------------------------- */ /* b - Back one 4K block */ /* --------------------------------------------------------- */ case 'b': start_row[dmode] = end_row[dmode] = edit_row[dmode] = 0; if (block > 0) block--; offset = 0; break; /* --------------------------------------------------------- */ /* c - Change color scheme */ /* --------------------------------------------------------- */ case 'c': color_scheme = !color_scheme; init_colors(); break; /* --------------------------------------------------------- */ /* page up key */ /* --------------------------------------------------------- */ case 0x19: // ctrl-y for vt100 case KEY_PPAGE: // PgUp case 0x15: // ctrl-u for vi compat. case 0x02: // ctrl-b for less compat. pageup(); break; /* --------------------------------------------------------- */ /* end - Jump to the end of the list */ /* --------------------------------------------------------- */ case 0x168: if (dmode == EXTENDED_MODE) { int ents_per_screen = dsplines / lines_per_row[dmode]; edit_row[dmode] = end_row[dmode] - 1; if ((edit_row[dmode] - ents_per_screen)+1 > 0) start_row[dmode] = edit_row[dmode] - ents_per_screen + 1; else start_row[dmode] = 0; } /* TODO: Make end key work for other display modes. */ break; /* --------------------------------------------------------- */ /* f - Forward one 4K block */ /* --------------------------------------------------------- */ case 'f': start_row[dmode]=end_row[dmode]=edit_row[dmode] = 0; lines_per_row[dmode] = 1; block++; offset = 0; break; /* --------------------------------------------------------- */ /* page down key */ /* --------------------------------------------------------- */ case 0x16: // ctrl-v for vt100 case KEY_NPAGE: // PgDown case 0x04: // ctrl-d for vi compat. pagedn(); break; /* --------------------------------------------------------- */ /* enter key - change a value */ /* --------------------------------------------------------- */ case KEY_ENTER: case('\n'): case('\r'): editing = !editing; break; case KEY_RESIZE: getmaxyx(stdscr, termlines, termcols); termlines--; break; default: move(termlines - 1, 0); printw("Keystroke not understood: 0x%03x",ch); refresh(); usleep(50000); break; } /* switch */ } /* while !Quit */ Erase(); refresh(); endwin(); }/* interactive_mode */ /* ------------------------------------------------------------------------ */ /* gfs_log_header_in - read in a gfs1-style log header */ /* ------------------------------------------------------------------------ */ void gfs_log_header_in(struct gfs_log_header *head, struct gfs2_buffer_head *lbh) { struct gfs_log_header *str = lbh->iov.iov_base; gfs2_meta_header_in(&head->lh_header, lbh); head->lh_flags = be32_to_cpu(str->lh_flags); head->lh_pad = be32_to_cpu(str->lh_pad); head->lh_first = be64_to_cpu(str->lh_first); head->lh_sequence = be64_to_cpu(str->lh_sequence); head->lh_tail = be64_to_cpu(str->lh_tail); head->lh_last_dump = be64_to_cpu(str->lh_last_dump); memcpy(head->lh_reserved, str->lh_reserved, 64); } /* ------------------------------------------------------------------------ */ /* gfs_log_header_print - print a gfs1-style log header */ /* ------------------------------------------------------------------------ */ void gfs_log_header_print(struct gfs_log_header *lh) { gfs2_meta_header_print(&lh->lh_header); pv(lh, lh_flags, "%u", "0x%.8x"); pv(lh, lh_pad, "%u", "%x"); pv((unsigned long long)lh, lh_first, "%llu", "%llx"); pv((unsigned long long)lh, lh_sequence, "%llu", "%llx"); pv((unsigned long long)lh, lh_tail, "%llu", "%llx"); pv((unsigned long long)lh, lh_last_dump, "%llu", "%llx"); } /* ------------------------------------------------------------------------ */ /* print_ld_blocks - print all blocks given in a log descriptor */ /* returns: the number of block numbers it printed */ /* ------------------------------------------------------------------------ */ static int print_ld_blocks(const uint64_t *b, const char *end, int start_line) { int bcount = 0, i = 0; static char str[256]; while (*b && (char *)b < end) { if (!termlines || (print_entry_ndx >= start_row[dmode] && ((print_entry_ndx - start_row[dmode])+1) * lines_per_row[dmode] <= termlines - start_line - 2)) { if (i && i % 4 == 0) { eol(0); print_gfs2(" "); } i++; sprintf(str, "0x%llx", (unsigned long long)be64_to_cpu(*b)); print_gfs2("%-18.18s ", str); bcount++; } b++; if (sbd.gfs1) b++; } eol(0); return bcount; } /* ------------------------------------------------------------------------ */ /* fsck_readi - same as libgfs2's gfs2_readi, but sets absolute block # */ /* of the first bit of data read. */ /* ------------------------------------------------------------------------ */ static int fsck_readi(struct gfs2_inode *ip, void *rbuf, uint64_t roffset, unsigned int size, uint64_t *abs_block) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *lbh; uint64_t lblock, dblock; unsigned int o; uint32_t extlen = 0; unsigned int amount; int not_new = 0; int isdir = !!(S_ISDIR(ip->i_di.di_mode)); int copied = 0; *abs_block = 0; if (roffset >= ip->i_di.di_size) return 0; if ((roffset + size) > ip->i_di.di_size) size = ip->i_di.di_size - roffset; if (!size) return 0; if (isdir) { o = roffset % sdp->sd_jbsize; lblock = roffset / sdp->sd_jbsize; } else { lblock = roffset >> sdp->sd_sb.sb_bsize_shift; o = roffset & (sdp->bsize - 1); } if (!ip->i_di.di_height) /* inode_is_stuffed */ o += sizeof(struct gfs2_dinode); else if (isdir) o += sizeof(struct gfs2_meta_header); while (copied < size) { amount = size - copied; if (amount > sdp->bsize - o) amount = sdp->bsize - o; if (!extlen) block_map(ip, lblock, ¬_new, &dblock, &extlen, FALSE); if (dblock) { lbh = bread(sdp, dblock); if (*abs_block == 0) *abs_block = lbh->b_blocknr; dblock++; extlen--; } else lbh = NULL; if (lbh) { memcpy(rbuf, lbh->b_data + o, amount); brelse(lbh); } else { memset(rbuf, 0, amount); } copied += amount; lblock++; o = (isdir) ? sizeof(struct gfs2_meta_header) : 0; } return copied; } static void check_journal_wrap(uint64_t seq, uint64_t *highest_seq) { if (seq < *highest_seq) { print_gfs2("------------------------------------------------" "------------------------------------------------"); eol(0); print_gfs2("Journal wrapped here."); eol(0); print_gfs2("------------------------------------------------" "------------------------------------------------"); eol(0); } *highest_seq = seq; } static int is_meta(struct gfs2_buffer_head *lbh) { uint32_t check_magic = ((struct gfs2_meta_header *)(lbh->b_data))->mh_magic; check_magic = be32_to_cpu(check_magic); if (check_magic == GFS2_MAGIC) return 1; return 0; } /* ------------------------------------------------------------------------ */ /* dump_journal - dump a journal file's contents. */ /* ------------------------------------------------------------------------ */ static void dump_journal(const char *journal) { struct gfs2_buffer_head *j_bh = NULL, dummy_bh; uint64_t jblock, j_size, jb, abs_block, saveblk; int error, start_line, journal_num; struct gfs2_inode *j_inode = NULL; int ld_blocks = 0; uint64_t highest_seq = 0; char *jbuf = NULL; start_line = line; lines_per_row[dmode] = 1; error = 0; journal_num = atoi(journal + 7); print_gfs2("Dumping journal #%d.", journal_num); eol(0); jblock = find_journal_block(journal, &j_size); if (!jblock) return; if (!sbd.gfs1) { j_bh = bread(&sbd, jblock); j_inode = lgfs2_inode_get(&sbd, j_bh); if (j_inode == NULL) { fprintf(stderr, "Out of memory\n"); exit(-1); } jbuf = malloc(sbd.bsize); if (jbuf == NULL) { fprintf(stderr, "Out of memory\n"); exit(-1); } } for (jb = 0; jb < j_size; jb += (sbd.gfs1 ? 1:sbd.bsize)) { if (sbd.gfs1) { if (j_bh) brelse(j_bh); j_bh = bread(&sbd, jblock + jb); abs_block = jblock + jb; dummy_bh.b_data = j_bh->b_data; } else { error = fsck_readi(j_inode, (void *)jbuf, jb, sbd.bsize, &abs_block); if (!error) /* end of file */ break; dummy_bh.b_data = jbuf; } if (get_block_type(&dummy_bh) == GFS2_METATYPE_LD) { uint64_t *b; struct gfs2_log_descriptor ld; int ltndx; uint32_t logtypes[2][6] = { {GFS2_LOG_DESC_METADATA, GFS2_LOG_DESC_REVOKE, GFS2_LOG_DESC_JDATA, 0, 0, 0}, {GFS_LOG_DESC_METADATA, GFS_LOG_DESC_IUL, GFS_LOG_DESC_IDA, GFS_LOG_DESC_Q, GFS_LOG_DESC_LAST, 0}}; const char *logtypestr[2][6] = { {"Metadata", "Revoke", "Jdata", "Unknown", "Unknown", "Unknown"}, {"Metadata", "Unlinked inode", "Dealloc inode", "Quota", "Final Entry", "Unknown"}}; print_gfs2("0x%llx (j+%4llx): Log descriptor, ", abs_block, jb / (sbd.gfs1 ? 1 : sbd.bsize)); gfs2_log_descriptor_in(&ld, &dummy_bh); print_gfs2("type %d ", ld.ld_type); for (ltndx = 0;; ltndx++) { if (ld.ld_type == logtypes[sbd.gfs1][ltndx] || logtypes[sbd.gfs1][ltndx] == 0) break; } print_gfs2("(%s) ", logtypestr[sbd.gfs1][ltndx]); print_gfs2("len:%u, data1: %u", ld.ld_length, ld.ld_data1); eol(0); print_gfs2(" "); if (sbd.gfs1) b = (uint64_t *)(dummy_bh.b_data + sizeof(struct gfs_log_descriptor)); else b = (uint64_t *)(dummy_bh.b_data + sizeof(struct gfs2_log_descriptor)); ld_blocks = ld.ld_data1; ld_blocks -= print_ld_blocks(b, (dummy_bh.b_data + sbd.bsize), start_line); } else if (get_block_type(&dummy_bh) == GFS2_METATYPE_LH) { struct gfs2_log_header lh; struct gfs_log_header lh1; if (sbd.gfs1) { gfs_log_header_in(&lh1, &dummy_bh); check_journal_wrap(lh1.lh_sequence, &highest_seq); print_gfs2("0x%llx (j+%4llx): Log header: " "Flags:%x, Seq: 0x%x, " "1st: 0x%x, tail: 0x%x, " "last: 0x%x", abs_block, jb, lh1.lh_flags, lh1.lh_sequence, lh1.lh_first, lh1.lh_tail, lh1.lh_last_dump); } else { gfs2_log_header_in(&lh, &dummy_bh); check_journal_wrap(lh.lh_sequence, &highest_seq); print_gfs2("0x%llx (j+%4llx): Log header: Seq" ": 0x%x, tail: 0x%x, blk: 0x%x", abs_block, jb / sbd.bsize, lh.lh_sequence, lh.lh_tail, lh.lh_blkno); } eol(0); } else if (sbd.gfs1 && ld_blocks > 0) { print_gfs2("0x%llx (j+%4llx): GFS log descriptor" " continuation block", abs_block, jb); eol(0); print_gfs2(" "); ld_blocks -= print_ld_blocks((uint64_t *)dummy_bh.b_data, (dummy_bh.b_data + sbd.bsize), start_line); } else if (details && is_meta(&dummy_bh)) { saveblk = block; block = abs_block; display(0); block = saveblk; } } inode_put(&j_inode); brelse(j_bh); blockhist = -1; /* So we don't print anything else */ free(jbuf); } /* ------------------------------------------------------------------------ */ /* usage - print command line usage */ /* ------------------------------------------------------------------------ */ static void usage(void) { fprintf(stderr,"\nFormat is: gfs2_edit [-c 1] [-V] [-x] [-h] [identify] [-z <0-9>] [-p structures|blocks][blocktype][blockalloc [val]][blockbits][blockrg][rgcount][rgflags][rgbitmaps][find sb|rg|rb|di|in|lf|jd|lh|ld|ea|ed|lb|13|qc][field [val]] /dev/device\n\n"); fprintf(stderr,"If only the device is specified, it enters into hexedit mode.\n"); fprintf(stderr,"identify - prints out only the block type, not the details.\n"); fprintf(stderr,"printsavedmeta - prints out the saved metadata blocks from a savemeta file.\n"); fprintf(stderr,"savemeta - save off your metadata for analysis and debugging.\n"); fprintf(stderr," (The intelligent way: assume bitmap is correct).\n"); fprintf(stderr,"savemetaslow - save off your metadata for analysis and debugging. The SLOW way (block by block).\n"); fprintf(stderr,"savergs - save off only the resource group information (rindex and rgs).\n"); fprintf(stderr,"restoremeta - restore metadata for debugging (DANGEROUS).\n"); fprintf(stderr,"rgcount - print how many RGs in the file system.\n"); fprintf(stderr,"rgflags rgnum [new flags] - print or modify flags for rg #rgnum (0 - X)\n"); fprintf(stderr,"rgbitmaps - print out the bitmaps for rgrp " "rgnum.\n"); fprintf(stderr,"-V prints version number.\n"); fprintf(stderr,"-c 1 selects alternate color scheme 1\n"); fprintf(stderr,"-d prints details (for printing journals)\n"); fprintf(stderr,"-p prints GFS2 structures or blocks to stdout.\n"); fprintf(stderr," sb - prints the superblock.\n"); fprintf(stderr," size - prints the filesystem size.\n"); fprintf(stderr," master - prints the master directory.\n"); fprintf(stderr," root - prints the root directory.\n"); fprintf(stderr," jindex - prints the journal index directory.\n"); fprintf(stderr," per_node - prints the per_node directory.\n"); fprintf(stderr," inum - prints the inum file.\n"); fprintf(stderr," statfs - prints the statfs file.\n"); fprintf(stderr," rindex - prints the rindex file.\n"); fprintf(stderr," rg X - print resource group X.\n"); fprintf(stderr," rgs - prints all the resource groups (rgs).\n"); fprintf(stderr," quota - prints the quota file.\n"); fprintf(stderr," 0x1234 - prints the specified block\n"); fprintf(stderr,"-p blocktype - prints the type " "of the specified block\n"); fprintf(stderr,"-p blockrg - prints the resource group " "block corresponding to the specified block\n"); fprintf(stderr,"-p blockbits - prints the block with " "the bitmap corresponding to the specified block\n"); fprintf(stderr,"-p blockalloc [0|1|2|3] - print or change " "the allocation type of the specified block\n"); fprintf(stderr,"-p field [new_value] - prints or change the " "structure field\n"); fprintf(stderr,"-p find sb|rg|rb|di|in|lf|jd|lh|ld|ea|ed|lb|" "13|qc - find block of given type after block \n"); fprintf(stderr," specifies the starting block for search\n"); fprintf(stderr,"-z 1 use gzip compression level 1 for savemeta (default 9)\n"); fprintf(stderr,"-z 0 do not use compression\n"); fprintf(stderr,"-s specifies a starting block such as root, rindex, quota, inum.\n"); fprintf(stderr,"-x print in hexmode.\n"); fprintf(stderr,"-h prints this help.\n\n"); fprintf(stderr,"Examples:\n"); fprintf(stderr," To run in interactive mode:\n"); fprintf(stderr," gfs2_edit /dev/bobs_vg/lvol0\n"); fprintf(stderr," To print out the superblock and master directory:\n"); fprintf(stderr," gfs2_edit -p sb master /dev/bobs_vg/lvol0\n"); fprintf(stderr," To print out the master directory in hex:\n"); fprintf(stderr," gfs2_edit -x -p master /dev/bobs_vg/lvol0\n"); fprintf(stderr," To print out the block-type for block 0x27381:\n"); fprintf(stderr," gfs2_edit identify -p 0x27381 /dev/bobs_vg/lvol0\n"); fprintf(stderr," To print out the fourth Resource Group. (the first R is #0)\n"); fprintf(stderr," gfs2_edit -p rg 3 /dev/sdb1\n"); fprintf(stderr," To print out the metadata type of block 1234\n"); fprintf(stderr," gfs2_edit -p 1234 blocktype /dev/roth_vg/roth_lb\n"); fprintf(stderr," To print out the allocation type of block 2345\n"); fprintf(stderr," gfs2_edit -p 2345 blockalloc /dev/vg/lv\n"); fprintf(stderr," To change the allocation type of block 2345 to a 'free block'\n"); fprintf(stderr," gfs2_edit -p 2345 blockalloc 0 /dev/vg/lv\n"); fprintf(stderr," To print out the file size of the dinode at block 0x118\n"); fprintf(stderr," gfs2_edit -p 0x118 field di_size /dev/roth_vg/roth_lb\n"); fprintf(stderr," To find any dinode higher than the quota file dinode:\n"); fprintf(stderr," gfs2_edit -p quota find di /dev/x/y\n"); fprintf(stderr," To set the Resource Group flags for rg #7 to 3.\n"); fprintf(stderr," gfs2_edit rgflags 7 3 /dev/sdc2\n"); fprintf(stderr," To save off all metadata for /dev/vg/lv without compression:\n"); fprintf(stderr," gfs2_edit savemeta -z 0 /dev/vg/lv /tmp/metasave\n"); }/* usage */ /** * getgziplevel - Process the -z parameter to savemeta operations * argv - argv * i - a pointer to the argv index at which to begin processing * The index pointed to by i will be incremented past the -z option if found */ static void getgziplevel(char *argv[], int *i) { char *endptr; (*i)++; if (!strcasecmp(argv[*i], "-z")) { (*i)++; errno = 0; gziplevel = strtol(argv[*i], &endptr, 10); if (errno || endptr == argv[*i] || gziplevel < 0 || gziplevel > 9) { fprintf(stderr, "Compression level out of range: %s\n", argv[*i]); exit(-1); } } else { (*i)--; } } /* ------------------------------------------------------------------------ */ /* parameterpass1 - pre-processing for command-line parameters */ /* ------------------------------------------------------------------------ */ static void parameterpass1(int argc, char *argv[], int i) { if (!strcasecmp(argv[i], "-V")) { printf("%s version %s (built %s %s)\n", argv[0], VERSION, __DATE__, __TIME__); printf("%s\n", REDHAT_COPYRIGHT); exit(0); } else if (!strcasecmp(argv[i], "-h") || !strcasecmp(argv[i], "-help") || !strcasecmp(argv[i], "-usage")) { usage(); exit(0); } else if (!strcasecmp(argv[i], "-c")) { i++; color_scheme = atoi(argv[i]); } else if (!strcasecmp(argv[i], "-p") || !strcasecmp(argv[i], "-print")) { termlines = 0; /* initial value--we'll figure it out later */ dmode = GFS2_MODE; } else if (!strcasecmp(argv[i], "-d") || !strcasecmp(argv[i], "-details")) details = 1; else if (!strcasecmp(argv[i], "savemeta")) termlines = 0; else if (!strcasecmp(argv[i], "savemetaslow")) termlines = 0; else if (!strcasecmp(argv[i], "savergs")) termlines = 0; else if (!strcasecmp(argv[i], "printsavedmeta")) { if (dmode == INIT_MODE) dmode = GFS2_MODE; restoremeta(argv[i+1], argv[i+2], TRUE); } else if (!strcasecmp(argv[i], "restoremeta")) { if (dmode == INIT_MODE) dmode = HEX_MODE; /* hopefully not used */ restoremeta(argv[i+1], argv[i+2], FALSE); } else if (!strcmp(argv[i], "rgcount")) termlines = 0; else if (!strcmp(argv[i], "rgflags")) termlines = 0; else if (!strcmp(argv[i], "rg")) termlines = 0; else if (!strcasecmp(argv[i], "-x")) dmode = HEX_MODE; else if (!device[0] && strchr(argv[i],'/')) strcpy(device, argv[i]); } /* ------------------------------------------------------------------------ */ /* process_parameters - process commandline parameters */ /* pass - we make two passes through the parameters; the first pass gathers */ /* normals parameters, device name, etc. The second pass is for */ /* figuring out what structures to print out. */ /* ------------------------------------------------------------------------ */ static void process_parameters(int argc, char *argv[], int pass) { int i; uint64_t keyword_blk; if (argc < 2) { usage(); die("no device specified\n"); } for (i = 1; i < argc; i++) { if (!pass) { /* first pass */ parameterpass1(argc, argv, i); continue; } /* second pass */ if (!strcasecmp(argv[i], "-s")) { i++; if (i >= argc - 1) { printf("Error: starting block not specified " "with -s.\n"); printf("%s -s [starting block | keyword] " "\n", argv[0]); printf("For example: %s -s \"rg 3\" " "/dev/exxon_vg/exxon_lv\n", argv[0]); exit(EXIT_FAILURE); } starting_blk = check_keywords(argv[i]); continue; } if (termlines || strchr(argv[i],'/')) /* if print or slash */ continue; if (!strncmp(argv[i], "journal", 7) && isdigit(argv[i][7])) { dump_journal(argv[i]); continue; } keyword_blk = check_keywords(argv[i]); if (keyword_blk) push_block(keyword_blk); else if (!strcasecmp(argv[i], "-x")) dmode = HEX_MODE; else if (argv[i][0] == '-') /* if it starts with a dash */ ; /* ignore it--meant for pass == 0 */ else if (!strcmp(argv[i], "identify")) identify = TRUE; else if (!strcmp(argv[i], "size")) { printf("Device size: %llu (0x%llx)\n", (unsigned long long)max_block, (unsigned long long)max_block); exit(EXIT_SUCCESS); } else if (!strcmp(argv[i], "rgcount")) rgcount(); else if (!strcmp(argv[i], "field")) { i++; if (i >= argc - 1) { printf("Error: field not specified.\n"); printf("Format is: %s -p field " " [newvalue]\n", argv[0]); gfs2_rgrp_free(&sbd.rgtree); exit(EXIT_FAILURE); } process_field(argv[i], argv[i + 1]); } else if (!strcmp(argv[i], "blocktype")) { find_print_block_type(); } else if (!strcmp(argv[i], "blockrg")) { find_print_block_rg(0); } else if (!strcmp(argv[i], "blockbits")) { find_print_block_rg(1); } else if (!strcmp(argv[i], "blockalloc")) { if (isdigit(argv[i + 1][0])) { int newval; if (argv[i + 1][0]=='0' && argv[i + 1][1]=='x') sscanf(argv[i + 1], "%x", &newval); else newval = (uint64_t)atoi(argv[i + 1]); find_change_block_alloc(&newval); } else { find_change_block_alloc(NULL); } } else if (!strcmp(argv[i], "find")) { find_metablockoftype(argv[i + 1], 1); } else if (!strcmp(argv[i], "rgflags")) { int rg, set = FALSE; uint32_t new_flags = 0; i++; if (i >= argc - 1) { printf("Error: rg # not specified.\n"); printf("Format is: %s rgflags rgnum" "[newvalue]\n", argv[0]); gfs2_rgrp_free(&sbd.rgtree); exit(EXIT_FAILURE); } if (argv[i][0]=='0' && argv[i][1]=='x') sscanf(argv[i], "%"SCNx32, &rg); else rg = atoi(argv[i]); i++; if (i < argc - 1 && isdigit(argv[i][0])) { set = TRUE; if (argv[i][0]=='0' && argv[i][1]=='x') sscanf(argv[i], "%"SCNx32, &new_flags); else new_flags = atoi(argv[i]); } set_rgrp_flags(rg, new_flags, set, FALSE); gfs2_rgrp_free(&sbd.rgtree); exit(EXIT_SUCCESS); } else if (!strcmp(argv[i], "rg")) { int rg; i++; if (i >= argc - 1) { printf("Error: rg # not specified.\n"); printf("Format is: %s rg rgnum\n", argv[0]); gfs2_rgrp_free(&sbd.rgtree); exit(EXIT_FAILURE); } rg = atoi(argv[i]); if (!strcasecmp(argv[i + 1], "find")) { temp_blk = get_rg_addr(rg); push_block(temp_blk); } else { set_rgrp_flags(rg, 0, FALSE, TRUE); gfs2_rgrp_free(&sbd.rgtree); exit(EXIT_SUCCESS); } } else if (!strcmp(argv[i], "rgbitmaps")) { int rg, bmap; uint64_t rgblk; struct rgrp_tree *rgd; i++; if (i >= argc - 1) { printf("Error: rg # not specified.\n"); printf("Format is: %s rgbitmaps rgnum\n", argv[0]); gfs2_rgrp_free(&sbd.rgtree); exit(EXIT_FAILURE); } rg = atoi(argv[i]); rgblk = get_rg_addr(rg); rgd = gfs2_blk2rgrpd(&sbd, rgblk); if (rgd == NULL) { printf("Error: rg # is invalid.\n"); gfs2_rgrp_free(&sbd.rgtree); exit(EXIT_FAILURE); } for (bmap = 0; bmap < rgd->ri.ri_length; bmap++) push_block(rgblk + bmap); } else if (!strcasecmp(argv[i], "savemeta")) { getgziplevel(argv, &i); savemeta(argv[i+2], 0, gziplevel); } else if (!strcasecmp(argv[i], "savemetaslow")) { getgziplevel(argv, &i); savemeta(argv[i+2], 1, gziplevel); } else if (!strcasecmp(argv[i], "savergs")) { getgziplevel(argv, &i); savemeta(argv[i+2], 2, gziplevel); } else if (isdigit(argv[i][0])) { /* decimal addr */ sscanf(argv[i], "%"SCNd64, &temp_blk); push_block(temp_blk); } else { fprintf(stderr,"I don't know what '%s' means.\n", argv[i]); usage(); exit(EXIT_FAILURE); } } /* for */ }/* process_parameters */ int main(int argc, char *argv[]) { int i, j, fd; indirect = malloc(sizeof(struct iinfo)); if (!indirect) die("Out of memory."); memset(indirect, 0, sizeof(struct iinfo)); memset(start_row, 0, sizeof(start_row)); memset(lines_per_row, 0, sizeof(lines_per_row)); memset(end_row, 0, sizeof(end_row)); memset(edit_row, 0, sizeof(edit_row)); memset(edit_col, 0, sizeof(edit_col)); memset(edit_size, 0, sizeof(edit_size)); memset(last_entry_onscreen, 0, sizeof(last_entry_onscreen)); dmode = INIT_MODE; sbd.bsize = 4096; block = starting_blk = 0x10; for (i = 0; i < BLOCK_STACK_SIZE; i++) { blockstack[i].dmode = HEX_MODE; blockstack[i].block = block; for (j = 0; j < DMODES; j++) { blockstack[i].start_row[j] = 0; blockstack[i].end_row[j] = 0; blockstack[i].edit_row[j] = 0; blockstack[i].edit_col[j] = 0; blockstack[i].lines_per_row[j] = 0; } } edit_row[GFS2_MODE] = 10; /* Start off at root inode pointer in superblock */ memset(device, 0, sizeof(device)); termlines = 30; /* assume interactive mode until we find -p */ process_parameters(argc, argv, 0); if (dmode == INIT_MODE) dmode = HEX_MODE; fd = open(device, O_RDWR); if (fd < 0) die("can't open %s: %s\n", device, strerror(errno)); max_block = lseek(fd, 0, SEEK_END) / sbd.bsize; read_superblock(fd); max_block = lseek(fd, 0, SEEK_END) / sbd.bsize; strcpy(sbd.device_name, device); if (sbd.gfs1) edit_row[GFS2_MODE]++; else read_master_dir(); block_in_mem = -1; process_parameters(argc, argv, 1); /* get what to print from cmdline */ block = blockstack[0].block = starting_blk * (4096 / sbd.bsize); if (termlines) interactive_mode(); else { /* print all the structures requested */ i = 0; while (blockhist > 0) { block = blockstack[i + 1].block; if (!block) break; display(identify); if (!identify) { display_extended(); printf("-------------------------------------" \ "-----------------"); eol(0); } block = pop_block(); i++; } } close(fd); if (indirect) free(indirect); gfs2_rgrp_free(&sbd.rgtree); exit(EXIT_SUCCESS); } gfs2-utils/gfs2/edit/hexedit.h0000664000175000017500000001413512110647577015136 0ustar andyandy#ifndef __HEXVIEW_DOT_H__ #define __HEXVIEW_DOT_H__ #include #include #include #include #include #include "libgfs2.h" #include "copyright.cf" #ifndef TRUE #define TRUE 1 #endif #ifndef FALSE #define FALSE 0 #endif #define DMODES 3 enum dsp_mode { HEX_MODE = 0, GFS2_MODE = 1, EXTENDED_MODE = 2, INIT_MODE = 3 }; #define BLOCK_STACK_SIZE 256 #define pv(struct, member, fmt, fmt2) do { \ print_it(" "#member, fmt, fmt2, struct->member); \ } while (FALSE); #define RGLIST_DUMMY_BLOCK -2 extern struct gfs2_sb sb; extern uint64_t block; extern int blockhist; extern int edit_mode; extern int line; extern char edit_fmt[80]; extern char estring[1024]; /* edit string */ extern char efield[64]; extern uint64_t dev_offset; extern uint64_t max_block; extern struct gfs2_buffer_head *bh; extern int termlines; extern int insert; extern const char *termtype; extern int line; extern int struct_len; extern unsigned int offset; extern int edit_row[DMODES], edit_col[DMODES], print_entry_ndx; extern int start_row[DMODES], end_row[DMODES], lines_per_row[DMODES]; extern int edit_size[DMODES], last_entry_onscreen[DMODES]; extern char edit_fmt[80]; extern struct gfs2_sbd sbd; extern struct gfs_sb *sbd1; extern struct gfs2_inum gfs1_quota_di; /* kludge because gfs2 sb too small */ extern struct gfs2_inum gfs1_license_di; /* kludge because gfs2 sb too small */ extern struct gfs2_dinode di; extern int screen_chunk_size; /* how much of the 4K can fit on screen */ extern int gfs2_struct_type; extern uint64_t block_in_mem; extern char device[NAME_MAX]; extern int identify; extern int color_scheme; extern WINDOW *wind; extern int editing; extern uint64_t temp_blk; extern uint64_t starting_blk; extern const char *block_type_str[15]; extern int dsplines; extern int dsp_lines[DMODES]; extern int combined_display; struct gfs2_dirents { uint64_t block; struct gfs2_dirent dirent; char filename[NAME_MAX]; }; struct indirect_info { int is_dir; int height; uint64_t block; uint32_t dirents; uint16_t lf_depth; uint16_t lf_entries; uint32_t lf_dirent_format; uint64_t lf_next; struct metapath mp; struct gfs2_dirents dirent[64]; }; struct iinfo { struct indirect_info ii[512]; }; struct blkstack_info { uint64_t block; int start_row[DMODES]; int end_row[DMODES]; int lines_per_row[DMODES]; int edit_row[DMODES]; int edit_col[DMODES]; enum dsp_mode dmode; int gfs2_struct_type; struct metapath mp; }; extern struct blkstack_info blockstack[BLOCK_STACK_SIZE]; extern struct iinfo *indirect; /* more than the most indirect pointers possible for any given 4K block */ extern struct indirect_info masterdir; /* Master directory info */ extern int indirect_blocks; /* count of indirect blocks */ extern enum dsp_mode dmode; /* ------------------------------------------------------------------------ */ /* block_is_rgtree - there's no such block as the rglist. This is a */ /* special case meant to parse the rindex and follow the */ /* blocks to the real rgs. */ /* ------------------------------------------------------------------------ */ static inline int block_is_rgtree(void) { if (block == RGLIST_DUMMY_BLOCK) return TRUE; return FALSE; } #define SCREEN_HEIGHT (16) #define SCREEN_WIDTH (16) /* die() used to be in libgfs2.h */ static __inline__ __attribute__((noreturn, format (printf, 1, 2))) void die(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(-1); } /* Memory macros */ #define type_alloc(ptr, type, count) \ { \ (ptr) = (type *)malloc(sizeof(type) * (count)); \ if (!(ptr)) \ die("unable to allocate memory on line %d of file %s\n", \ __LINE__, __FILE__); \ } #define printk printw /* Divide x by y. Round up if there is a remainder. */ #define DIV_RU(x, y) (((x) + (y) - 1) / (y)) #define TITLE1 "gfs2_edit - Global File System Editor (use with extreme caution)" #define TITLE2 REDHAT_COPYRIGHT " - Press H for help" #define COLOR_TITLE 1 #define COLOR_NORMAL 2 #define COLOR_INVERSE 3 #define COLOR_SPECIAL 4 #define COLOR_HIGHLIGHT 5 #define COLOR_OFFSETS 6 #define COLOR_CONTENTS 7 #define COLORS_TITLE \ do { \ if (termlines) { \ attrset(COLOR_PAIR(COLOR_TITLE)); \ attron(A_BOLD); \ } \ } while (0) #define COLORS_NORMAL \ do { \ if (termlines) { \ attrset(COLOR_PAIR(COLOR_NORMAL)); \ attron(A_BOLD); \ } \ } while (0) #define COLORS_INVERSE \ do { \ if (termlines) { \ attrset(COLOR_PAIR(COLOR_INVERSE)); \ attron(A_BOLD); \ } \ } while (0) #define COLORS_SPECIAL \ do { \ if (termlines) { \ attrset(COLOR_PAIR(COLOR_SPECIAL)); \ attron(A_BOLD); \ } \ } while (0) #define COLORS_HIGHLIGHT \ do { \ if (termlines) { \ attrset(COLOR_PAIR(COLOR_HIGHLIGHT)); \ attron(A_BOLD); \ } \ } while (0) #define COLORS_OFFSETS \ do { \ if (termlines) { \ attrset(COLOR_PAIR(COLOR_OFFSETS)); \ attron(A_BOLD); \ } \ } while (0) #define COLORS_CONTENTS \ do { \ if (termlines) { \ attrset(COLOR_PAIR(COLOR_CONTENTS)); \ attron(A_BOLD); \ } \ } while (0) extern int block_is_jindex(void); extern int block_is_rindex(void); extern int block_is_inum_file(void); extern int block_is_statfs_file(void); extern int block_is_quota_file(void); extern int block_is_per_node(void); extern int display_block_type(int from_restore); extern void gfs_jindex_in(struct gfs_jindex *jindex, char *buf); extern void gfs_log_header_in(struct gfs_log_header *head, struct gfs2_buffer_head *bh); extern void gfs_log_header_print(struct gfs_log_header *lh); extern void gfs_dinode_in(struct gfs_dinode *di, struct gfs2_buffer_head *bh); extern void savemeta(char *out_fn, int saveoption, int gziplevel); extern void restoremeta(const char *in_fn, const char *out_device, uint64_t printblocksonly); extern int display(int identify_only); extern uint64_t check_keywords(const char *kword); extern uint64_t masterblock(const char *fn); extern void gfs_rgrp_print(struct gfs_rgrp *rg); extern int has_indirect_blocks(void); #endif /* __HEXVIEW_DOT_H__ */ gfs2-utils/gfs2/edit/savemeta.c0000664000175000017500000007124312120360543015271 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "osi_list.h" #include "gfs2hex.h" #include "hexedit.h" #include "libgfs2.h" #define BUFSIZE (4096) #define DFT_SAVE_FILE "/tmp/gfsmeta.XXXXXX" #define MAX_JOURNALS_SAVED 256 struct saved_metablock { uint64_t blk; uint16_t siglen; /* significant data length */ char buf[BUFSIZE]; }; struct metafd { int fd; gzFile gzfd; const char *filename; int gziplevel; }; struct saved_metablock *savedata; uint64_t last_reported_block, blks_saved, total_out, pct; uint64_t journal_blocks[MAX_JOURNALS_SAVED]; uint64_t gfs1_journal_size = 0; /* in blocks */ int journals_found = 0; extern void read_superblock(void); static int block_is_a_journal(void) { int j; for (j = 0; j < journals_found; j++) if (block == journal_blocks[j]) return TRUE; return FALSE; } static int block_is_in_per_node(void) { int d; struct gfs2_inode *per_node_di; if (sbd.gfs1) return FALSE; per_node_di = lgfs2_inode_read(&sbd, masterblock("per_node")); if (per_node_di == NULL) { fprintf(stderr, "Failed to read per_node: %s\n", strerror(errno)); exit(1); } do_dinode_extended(&per_node_di->i_di, per_node_di->i_bh); inode_put(&per_node_di); for (d = 0; d < indirect->ii[0].dirents; d++) { if (block == indirect->ii[0].dirent[d].block) return TRUE; } return FALSE; } static int block_is_systemfile(void) { return block_is_jindex() || block_is_inum_file() || block_is_statfs_file() || block_is_quota_file() || block_is_rindex() || block_is_a_journal() || block_is_per_node() || block_is_in_per_node(); } /** * anthropomorphize - make a uint64_t number more human */ static const char *anthropomorphize(unsigned long long inhuman_value) { const char *symbols = " KMGTPE"; int i; unsigned long long val = inhuman_value, remainder = 0; static char out_val[32]; memset(out_val, 0, sizeof(out_val)); for (i = 0; i < 6 && val > 1024; i++) { remainder = val % 1024; val /= 1024; } sprintf(out_val, "%llu.%llu%c", val, remainder, symbols[i]); return out_val; } /* * get_gfs_struct_info - get block type and structure length * * @block_type - pointer to integer to hold the block type * @struct_length - pointer to integet to hold the structure length * * returns: 0 if successful * -1 if this isn't gfs metadata. */ static int get_gfs_struct_info(struct gfs2_buffer_head *lbh, int *block_type, int *gstruct_len) { struct gfs2_meta_header mh; struct gfs2_inode *inode; *block_type = 0; *gstruct_len = sbd.bsize; gfs2_meta_header_in(&mh, lbh); if (mh.mh_magic != GFS2_MAGIC) return -1; *block_type = mh.mh_type; switch (mh.mh_type) { case GFS2_METATYPE_SB: /* 1 (superblock) */ *gstruct_len = sizeof(struct gfs_sb); break; case GFS2_METATYPE_RG: /* 2 (rsrc grp hdr) */ *gstruct_len = sbd.bsize; /*sizeof(struct gfs_rgrp);*/ break; case GFS2_METATYPE_RB: /* 3 (rsrc grp bitblk) */ *gstruct_len = sbd.bsize; break; case GFS2_METATYPE_DI: /* 4 (disk inode) */ if (sbd.gfs1) { inode = lgfs2_gfs_inode_get(&sbd, lbh); } else { inode = lgfs2_inode_get(&sbd, lbh); } if (inode == NULL) { perror("Error reading inode"); exit(-1); } if (S_ISDIR(inode->i_di.di_mode) || (sbd.gfs1 && inode->i_di.__pad1 == GFS_FILE_DIR)) *gstruct_len = sbd.bsize; else if (!inode->i_di.di_height && !block_is_systemfile() && !S_ISDIR(inode->i_di.di_mode)) *gstruct_len = sizeof(struct gfs2_dinode); else *gstruct_len = sbd.bsize; inode_put(&inode); break; case GFS2_METATYPE_IN: /* 5 (indir inode blklst) */ *gstruct_len = sbd.bsize; /*sizeof(struct gfs_indirect);*/ break; case GFS2_METATYPE_LF: /* 6 (leaf dinode blklst) */ *gstruct_len = sbd.bsize; /*sizeof(struct gfs_leaf);*/ break; case GFS2_METATYPE_JD: /* 7 (journal data) */ *gstruct_len = sbd.bsize; break; case GFS2_METATYPE_LH: /* 8 (log header) */ if (sbd.gfs1) *gstruct_len = 512; /* gfs copies the log header twice and compares the copy, so we need to save all 512 bytes of it. */ else *gstruct_len = sizeof(struct gfs2_log_header); break; case GFS2_METATYPE_LD: /* 9 (log descriptor) */ *gstruct_len = sbd.bsize; break; case GFS2_METATYPE_EA: /* 10 (extended attr hdr) */ *gstruct_len = sbd.bsize; break; case GFS2_METATYPE_ED: /* 11 (extended attr data) */ *gstruct_len = sbd.bsize; break; default: *gstruct_len = sbd.bsize; break; } return 0; } /* Put out a warm, fuzzy message every second so the user */ /* doesn't think we hung. (This may take a long time). */ /* We only check whether to report every one percent because */ /* checking every block kills performance. We only report */ /* every second because we don't need 100 extra messages in */ /* logs made from verbose mode. */ static void warm_fuzzy_stuff(uint64_t wfsblock, int force) { static struct timeval tv; static uint32_t seconds = 0; last_reported_block = wfsblock; gettimeofday(&tv, NULL); if (!seconds) seconds = tv.tv_sec; if (force || tv.tv_sec - seconds) { static uint64_t percent; seconds = tv.tv_sec; if (sbd.fssize) { printf("\r"); percent = (wfsblock * 100) / sbd.fssize; printf("%llu inodes processed, %llu blocks saved " "(%llu%%) processed, ", (unsigned long long)wfsblock, (unsigned long long)blks_saved, (unsigned long long)percent); if (force) printf("\n"); fflush(stdout); } } } /** * Open a file and prepare it for writing by savemeta() * out_fn: the path to the file, which will be truncated if it exists * gziplevel: 0 - do not compress the file, * 1-9 - use gzip compression level 1-9 * Returns a struct metafd containing the opened file descriptor */ static struct metafd savemetaopen(char *out_fn, int gziplevel) { struct metafd mfd = {-1, NULL, NULL, gziplevel}; char gzmode[3] = "w9"; char dft_fn[] = DFT_SAVE_FILE; if (!out_fn) { out_fn = dft_fn; mfd.fd = mkstemp(out_fn); } else { mfd.fd = open(out_fn, O_RDWR | O_CREAT, 0644); } mfd.filename = out_fn; if (mfd.fd < 0) { fprintf(stderr, "Can't open %s: %s\n", out_fn, strerror(errno)); exit(1); } if (ftruncate(mfd.fd, 0)) { fprintf(stderr, "Can't truncate %s: %s\n", out_fn, strerror(errno)); exit(1); } if (gziplevel > 0) { gzmode[1] = '0' + gziplevel; mfd.gzfd = gzdopen(mfd.fd, gzmode); if (!mfd.gzfd) { fprintf(stderr, "gzdopen error: %s\n", strerror(errno)); exit(1); } } return mfd; } /** * Write nbyte bytes from buf to a file opened with savemetaopen() * mfd: the file descriptor opened using savemetaopen() * buf: the buffer to write data from * nbyte: the number of bytes to write * Returns the number of bytes written from buf or -1 on error */ static ssize_t savemetawrite(struct metafd *mfd, const void *buf, size_t nbyte) { ssize_t ret; int gzerr; const char *gzerrmsg; if (mfd->gziplevel == 0) { return write(mfd->fd, buf, nbyte); } ret = gzwrite(mfd->gzfd, buf, nbyte); if (ret != nbyte) { gzerrmsg = gzerror(mfd->gzfd, &gzerr); if (gzerr != Z_ERRNO) { fprintf(stderr, "Error: zlib: %s\n", gzerrmsg); } } return ret; } /** * Closes a file descriptor previously opened using savemetaopen() * mfd: the file descriptor previously opened using savemetaopen() * Returns 0 on success or -1 on error */ static int savemetaclose(struct metafd *mfd) { int gzret; if (mfd->gziplevel > 0) { gzret = gzclose(mfd->gzfd); if (gzret == Z_STREAM_ERROR) { fprintf(stderr, "gzclose: file is not valid\n"); return -1; } else if (gzret == Z_ERRNO) { return -1; } } return close(mfd->fd); } static int save_block(int fd, struct metafd *mfd, uint64_t blk) { int blktype, blklen, outsz; uint16_t trailing0; char *p; struct gfs2_buffer_head *savebh; if (gfs2_check_range(&sbd, blk) && blk != sbd.sb_addr) { fprintf(stderr, "\nWarning: bad block pointer '0x%llx' " "ignored in block (block %llu (0x%llx))", (unsigned long long)blk, (unsigned long long)block, (unsigned long long)block); return 0; } memset(savedata, 0, sizeof(struct saved_metablock)); savebh = bread(&sbd, blk); memcpy(&savedata->buf, savebh->b_data, sbd.bsize); /* If this isn't metadata and isn't a system file, we don't want it. Note that we're checking "block" here rather than blk. That's because we want to know if the source inode's "block" is a system inode, not the block within the inode "blk". They may or may not be the same thing. */ if (get_gfs_struct_info(savebh, &blktype, &blklen) && !block_is_systemfile()) { brelse(savebh); return 0; /* Not metadata, and not system file, so skip it */ } trailing0 = 0; p = &savedata->buf[blklen - 1]; while (*p=='\0' && trailing0 < sbd.bsize) { trailing0++; p--; } savedata->blk = cpu_to_be64(blk); if (savemetawrite(mfd, &savedata->blk, sizeof(savedata->blk)) != sizeof(savedata->blk)) { fprintf(stderr, "write error: %s from %s:%d: " "block %lld (0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)savedata->blk, (unsigned long long)savedata->blk); exit(-1); } outsz = blklen - trailing0; savedata->siglen = cpu_to_be16(outsz); if (savemetawrite(mfd, &savedata->siglen, sizeof(savedata->siglen)) != sizeof(savedata->siglen)) { fprintf(stderr, "write error: %s from %s:%d: " "block %lld (0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)savedata->blk, (unsigned long long)savedata->blk); exit(-1); } if (savemetawrite(mfd, savedata->buf, outsz) != outsz) { fprintf(stderr, "write error: %s from %s:%d: " "block %lld (0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)savedata->blk, (unsigned long long)savedata->blk); exit(-1); } total_out += sizeof(savedata->blk) + sizeof(savedata->siglen) + outsz; blks_saved++; brelse(savebh); return blktype; } /* * save_ea_block - save off an extended attribute block */ static void save_ea_block(struct metafd *mfd, struct gfs2_buffer_head *metabh) { int i, e, ea_len = sbd.bsize; struct gfs2_ea_header ea; for (e = sizeof(struct gfs2_meta_header); e < sbd.bsize; e += ea_len) { uint64_t blk, *b; int charoff; gfs2_ea_header_in(&ea, metabh->b_data + e); for (i = 0; i < ea.ea_num_ptrs; i++) { charoff = e + ea.ea_name_len + sizeof(struct gfs2_ea_header) + sizeof(uint64_t) - 1; charoff /= sizeof(uint64_t); b = (uint64_t *)(metabh->b_data); b += charoff + i; blk = be64_to_cpu(*b); save_block(sbd.device_fd, mfd, blk); } if (!ea.ea_rec_len) break; ea_len = ea.ea_rec_len; } } /* * save_indirect_blocks - save all indirect blocks for the given buffer */ static void save_indirect_blocks(struct metafd *mfd, osi_list_t *cur_list, struct gfs2_buffer_head *mybh, int height, int hgt) { uint64_t old_block = 0, indir_block; uint64_t *ptr; int head_size, blktype; struct gfs2_buffer_head *nbh; head_size = (hgt > 1 ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode)); for (ptr = (uint64_t *)(mybh->b_data + head_size); (char *)ptr < (mybh->b_data + sbd.bsize); ptr++) { if (!*ptr) continue; indir_block = be64_to_cpu(*ptr); if (indir_block == old_block) continue; old_block = indir_block; blktype = save_block(sbd.device_fd, mfd, indir_block); if (blktype == GFS2_METATYPE_EA) { nbh = bread(&sbd, indir_block); save_ea_block(mfd, nbh); brelse(nbh); } if (height != hgt && /* If not at max height and */ (!gfs2_check_range(&sbd, indir_block))) { nbh = bread(&sbd, indir_block); osi_list_add_prev(&nbh->b_altlist, cur_list); /* The buffer_head needs to be queued ahead, so don't release it! brelse(nbh);*/ } } /* for all data on the indirect block */ } /* * save_inode_data - save off important data associated with an inode * * mfd - destination file descriptor * block - block number of the inode to save the data for * * For user files, we don't want anything except all the indirect block * pointers that reside on blocks on all but the highest height. * * For system files like statfs and inum, we want everything because they * may contain important clues and no user data. * * For file system journals, the "data" is a mixture of metadata and * journaled data. We want all the metadata and none of the user data. */ static void save_inode_data(struct metafd *mfd) { uint32_t height; struct gfs2_inode *inode; osi_list_t metalist[GFS2_MAX_META_HEIGHT]; osi_list_t *prev_list, *cur_list, *tmp; struct gfs2_buffer_head *metabh, *mybh; int i; for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) osi_list_init(&metalist[i]); metabh = bread(&sbd, block); if (sbd.gfs1) { inode = lgfs2_gfs_inode_get(&sbd, metabh); } else { inode = lgfs2_inode_get(&sbd, metabh); } if (inode == NULL) { perror("Failed to read inode"); exit(-1); } height = inode->i_di.di_height; /* If this is a user inode, we don't follow to the file height. We stop one level less. That way we save off the indirect pointer blocks but not the actual file contents. The exception is directories, where the height represents the level at which the hash table exists, and we have to save the directory data. */ if (inode->i_di.di_flags & GFS2_DIF_EXHASH && (S_ISDIR(inode->i_di.di_mode) || (sbd.gfs1 && inode->i_di.__pad1 == GFS_FILE_DIR))) height++; else if (height && !(inode->i_di.di_flags & GFS2_DIF_SYSTEM) && !block_is_systemfile() && !S_ISDIR(inode->i_di.di_mode)) height--; osi_list_add(&metabh->b_altlist, &metalist[0]); for (i = 1; i <= height; i++){ prev_list = &metalist[i - 1]; cur_list = &metalist[i]; for (tmp = prev_list->next; tmp != prev_list; tmp = tmp->next){ mybh = osi_list_entry(tmp, struct gfs2_buffer_head, b_altlist); warm_fuzzy_stuff(block, FALSE); save_indirect_blocks(mfd, cur_list, mybh, height, i); } /* for blocks at that height */ } /* for height */ /* free metalists */ for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { cur_list = &metalist[i]; while (!osi_list_empty(cur_list)) { mybh = osi_list_entry(cur_list->next, struct gfs2_buffer_head, b_altlist); if (mybh == inode->i_bh) osi_list_del(&mybh->b_altlist); else brelse(mybh); } } /* Process directory exhash inodes */ if (S_ISDIR(inode->i_di.di_mode) && inode->i_di.di_flags & GFS2_DIF_EXHASH) { uint64_t leaf_no, old_leaf = -1; int li; for (li = 0; li < (1 << inode->i_di.di_depth); li++) { gfs2_get_leaf_nr(inode, li, &leaf_no); if (leaf_no == old_leaf || gfs2_check_range(&sbd, leaf_no) != 0) continue; old_leaf = leaf_no; mybh = bread(&sbd, leaf_no); warm_fuzzy_stuff(block, FALSE); if (gfs2_check_meta(mybh, GFS2_METATYPE_LF) == 0) save_block(sbd.device_fd, mfd, leaf_no); brelse(mybh); } } if (inode->i_di.di_eattr) { /* if this inode has extended attributes */ struct gfs2_meta_header mh; struct gfs2_buffer_head *lbh; lbh = bread(&sbd, inode->i_di.di_eattr); save_block(sbd.device_fd, mfd, inode->i_di.di_eattr); gfs2_meta_header_in(&mh, lbh); if (mh.mh_magic == GFS2_MAGIC && mh.mh_type == GFS2_METATYPE_EA) save_ea_block(mfd, lbh); else if (mh.mh_magic == GFS2_MAGIC && mh.mh_type == GFS2_METATYPE_IN) save_indirect_blocks(mfd, cur_list, lbh, 2, 2); else { if (mh.mh_magic == GFS2_MAGIC) /* if it's metadata */ save_block(sbd.device_fd, mfd, inode->i_di.di_eattr); fprintf(stderr, "\nWarning: corrupt extended " "attribute at block %llu (0x%llx) " "detected in inode %lld (0x%llx).\n", (unsigned long long)inode->i_di.di_eattr, (unsigned long long)inode->i_di.di_eattr, (unsigned long long)block, (unsigned long long)block); } brelse(lbh); } inode_put(&inode); brelse(metabh); } static void get_journal_inode_blocks(void) { int journal; journals_found = 0; memset(journal_blocks, 0, sizeof(journal_blocks)); /* Save off all the journals--but only the metadata. * This is confusing so I'll explain. The journals contain important * metadata. However, in gfs2 the journals are regular files within * the system directory. Since they're regular files, the blocks * within the journals are considered data, not metadata. Therefore, * they won't have been saved by the code above. We want to dump * these blocks, but we have to be careful. We only care about the * journal blocks that look like metadata, and we need to not save * journaled user data that may exist there as well. */ for (journal = 0; ; journal++) { /* while journals exist */ uint64_t jblock; int amt; struct gfs2_inode *j_inode = NULL; if (sbd.gfs1) { struct gfs_jindex ji; char jbuf[sizeof(struct gfs_jindex)]; j_inode = lgfs2_gfs_inode_read(&sbd, sbd1->sb_jindex_di.no_addr); if (j_inode == NULL) { fprintf(stderr, "Error reading journal inode: %s\n", strerror(errno)); return; } amt = gfs2_readi(j_inode, (void *)&jbuf, journal * sizeof(struct gfs_jindex), sizeof(struct gfs_jindex)); inode_put(&j_inode); if (!amt) break; gfs_jindex_in(&ji, jbuf); jblock = ji.ji_addr; gfs1_journal_size = ji.ji_nsegment * 16; } else { if (journal > indirect->ii[0].dirents - 3) break; jblock = indirect->ii[0].dirent[journal + 2].block; } journal_blocks[journals_found++] = jblock; } } static void save_allocated(struct rgrp_tree *rgd, struct metafd *mfd) { int blktype; unsigned i, j, m; uint64_t *ibuf = malloc(sbd.bsize * GFS2_NBBY * sizeof(uint64_t)); for (i = 0; i < rgd->ri.ri_length; i++) { m = lgfs2_bm_scan(rgd, i, ibuf, GFS2_BLKST_DINODE); for (j = 0; j < m; j++) { block = ibuf[j]; warm_fuzzy_stuff(block, FALSE); blktype = save_block(sbd.device_fd, mfd, block); if (blktype == GFS2_METATYPE_DI) save_inode_data(mfd); } if (!sbd.gfs1) continue; /* For gfs1, Save off the free/unlinked meta blocks too. * If we don't, we may run into metadata allocation issues. */ m = lgfs2_bm_scan(rgd, i, ibuf, GFS2_BLKST_UNLINKED); for (j = 0; j < m; j++) { blktype = save_block(sbd.device_fd, mfd, block); } } free(ibuf); } void savemeta(char *out_fn, int saveoption, int gziplevel) { int rgcount; uint64_t jindex_block; struct gfs2_buffer_head *lbh; struct metafd mfd; int sane; struct osi_node *n, *next = NULL; sbd.md.journals = 1; mfd = savemetaopen(out_fn, gziplevel); savedata = malloc(sizeof(struct saved_metablock)); if (!savedata) die("Can't allocate memory for the operation.\n"); lseek(sbd.device_fd, 0, SEEK_SET); blks_saved = total_out = last_reported_block = 0; if (!sbd.gfs1) sbd.bsize = BUFSIZE; if (lgfs2_get_dev_info(sbd.device_fd, &sbd.dinfo)) { perror(sbd.device_name); exit(-1); } fix_device_geometry(&sbd); sbd.rgtree.osi_node = NULL; if (!sbd.gfs1) sbd.sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE; if (compute_constants(&sbd)) { fprintf(stderr, "Bad constants (1)\n"); exit(-1); } if (read_sb(&sbd) < 0) { fprintf(stderr, "Unable to read superblock.\n"); exit(-1); } if (sbd.gfs1) sbd.bsize = sbd.sd_sb.sb_bsize; printf("There are %llu blocks of %u bytes in the destination " "device.\n", (unsigned long long)sbd.fssize, sbd.bsize); if (sbd.gfs1) { sbd.md.riinode = lgfs2_inode_read(&sbd, sbd1->sb_rindex_di.no_addr); if (sbd.md.riinode == NULL) { fprintf(stderr, "Unable to read rindex: %s.\n", strerror(errno)); exit(-1); } jindex_block = sbd1->sb_jindex_di.no_addr; } else { sbd.master_dir = lgfs2_inode_read(&sbd, sbd.sd_sb.sb_master_dir.no_addr); if (sbd.master_dir == NULL) { fprintf(stderr, "Unable to read master: %s.\n", strerror(errno)); exit(-1); } gfs2_lookupi(sbd.master_dir, "rindex", 6, &sbd.md.riinode); jindex_block = masterblock("jindex"); } lbh = bread(&sbd, jindex_block); gfs2_dinode_in(&di, lbh); if (!sbd.gfs1) do_dinode_extended(&di, lbh); brelse(lbh); printf("Reading resource groups..."); fflush(stdout); if (sbd.gfs1) gfs1_ri_update(&sbd, 0, &rgcount, 0); else ri_update(&sbd, 0, &rgcount, &sane); printf("Done. File system size: %s\n\n", anthropomorphize(sbd.fssize)); fflush(stdout); get_journal_inode_blocks(); /* Save off the superblock */ save_block(sbd.device_fd, &mfd, 0x10 * (4096 / sbd.bsize)); /* If this is gfs1, save off the rindex because it's not part of the file system as it is in gfs2. */ if (sbd.gfs1) { int j; block = sbd1->sb_rindex_di.no_addr; save_block(sbd.device_fd, &mfd, block); save_inode_data(&mfd); /* In GFS1, journals aren't part of the RG space */ for (j = 0; j < journals_found; j++) { log_debug("Saving journal #%d\n", j + 1); for (block = journal_blocks[j]; block < journal_blocks[j] + gfs1_journal_size; block++) save_block(sbd.device_fd, &mfd, block); } } /* Walk through the resource groups saving everything within */ for (n = osi_first(&sbd.rgtree); n; n = next) { struct rgrp_tree *rgd; next = osi_next(n); rgd = (struct rgrp_tree *)n; if (gfs2_rgrp_read(&sbd, rgd)) continue; log_debug("RG at %lld (0x%llx) is %u long\n", (unsigned long long)rgd->ri.ri_addr, (unsigned long long)rgd->ri.ri_addr, rgd->ri.ri_length); /* Save off the rg and bitmaps */ for (block = rgd->ri.ri_addr; block < rgd->ri.ri_data0; block++) { warm_fuzzy_stuff(block, FALSE); save_block(sbd.device_fd, &mfd, block); } /* Save off the other metadata: inodes, etc. if mode is not 'savergs' */ if (saveoption != 2) { save_allocated(rgd, &mfd); } gfs2_rgrp_relse(rgd); } /* Clean up */ /* There may be a gap between end of file system and end of device */ /* so we tell the user that we've processed everything. */ block = sbd.fssize; warm_fuzzy_stuff(block, TRUE); printf("\nMetadata saved to file %s ", mfd.filename); if (mfd.gziplevel) { printf("(gzipped, level %d).\n", mfd.gziplevel); } else { printf("(uncompressed).\n"); } free(savedata); savemetaclose(&mfd); close(sbd.device_fd); free(indirect); gfs2_rgrp_free(&sbd.rgtree); exit(0); } static int restore_data(int fd, gzFile gzin_fd, int printblocksonly, int find_highblk) { size_t rs; uint64_t buf64, writes = 0, highest_valid_block = 0; uint16_t buf16; int first = 1, pos, gzerr; char rdbuf[256]; char gfs_superblock_id[8] = {0x01, 0x16, 0x19, 0x70, 0x00, 0x00, 0x00, 0x01}; if (!printblocksonly) lseek(fd, 0, SEEK_SET); gzseek(gzin_fd, 0, SEEK_SET); rs = gzread(gzin_fd, rdbuf, sizeof(rdbuf)); if (rs != sizeof(rdbuf)) { fprintf(stderr, "Error: File is too small.\n"); return -1; } for (pos = 0; pos < sizeof(rdbuf) - sizeof(uint64_t) - sizeof(uint16_t); pos++) { if (!memcmp(&rdbuf[pos + sizeof(uint64_t) + sizeof(uint16_t)], gfs_superblock_id, sizeof(gfs_superblock_id))) { break; } } if (pos == sizeof(rdbuf) - sizeof(uint64_t) - sizeof(uint16_t)) pos = 0; if (gzseek(gzin_fd, pos, SEEK_SET) != pos) { fprintf(stderr, "bad seek: %s from %s:%d: " "offset %lld (0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)pos, (unsigned long long)pos); exit(-1); } blks_saved = total_out = 0; while (TRUE) { struct gfs2_buffer_head dummy_bh; memset(savedata, 0, sizeof(struct saved_metablock)); rs = gzread(gzin_fd, &buf64, sizeof(uint64_t)); if (!rs) break; if (rs != sizeof(uint64_t)) { fprintf(stderr, "Error reading from file.\n"); return -1; } total_out += sbd.bsize; savedata->blk = be64_to_cpu(buf64); if (!printblocksonly && sbd.fssize && savedata->blk >= sbd.fssize) { fprintf(stderr, "Error: File system is too small to " "restore this metadata.\n"); fprintf(stderr, "File system is %llu blocks, ", (unsigned long long)sbd.fssize); fprintf(stderr, "Restore block = %llu\n", (unsigned long long)savedata->blk); return -1; } if (gzread(gzin_fd, &buf16, sizeof(uint16_t)) != sizeof(uint16_t)) { fprintf(stderr, "read error: %s from %s:%d: " "block %lld (0x%llx)\n", gzerror(gzin_fd, &gzerr), __FUNCTION__, __LINE__, (unsigned long long)savedata->blk, (unsigned long long)savedata->blk); exit(-1); } savedata->siglen = be16_to_cpu(buf16); if (savedata->siglen > sizeof(savedata->buf)) { fprintf(stderr, "\nBad record length: %d for block #%llu" " (0x%llx).\n", savedata->siglen, (unsigned long long)savedata->blk, (unsigned long long)savedata->blk); return -1; } if (savedata->siglen && gzread(gzin_fd, savedata->buf, savedata->siglen) != savedata->siglen) { fprintf(stderr, "read error: %s from %s:%d: " "block %lld (0x%llx)\n", gzerror(gzin_fd, &gzerr), __FUNCTION__, __LINE__, (unsigned long long)savedata->blk, (unsigned long long)savedata->blk); exit(-1); } if (first) { struct gfs2_sb bufsb; int ret; dummy_bh.b_data = (char *)&bufsb; memcpy(&bufsb, savedata->buf, sizeof(bufsb)); gfs2_sb_in(&sbd.sd_sb, &dummy_bh); sbd1 = (struct gfs_sb *)&sbd.sd_sb; ret = check_sb(&sbd.sd_sb); if (ret < 0) { fprintf(stderr,"Error: Invalid superblock data.\n"); return -1; } if (ret == 1) sbd.gfs1 = TRUE; sbd.bsize = sbd.sd_sb.sb_bsize; if (find_highblk) ; else if (!printblocksonly) { sbd.fssize = lseek(fd, 0, SEEK_END) / sbd.bsize; printf("There are %llu blocks of %u bytes in " "the destination device.\n\n", (unsigned long long)sbd.fssize, sbd.bsize); } else { printf("This is %s metadata\n", sbd.gfs1 ? "gfs (not gfs2)" : "gfs2"); } first = 0; } bh = &dummy_bh; bh->b_data = savedata->buf; if (savedata->blk > highest_valid_block) highest_valid_block = savedata->blk; if (find_highblk) ; else if (printblocksonly) { block = savedata->blk; if (printblocksonly > 1 && printblocksonly == block) { block_in_mem = block; display(0); return 0; } else if (printblocksonly == 1) { print_gfs2("%d (l=0x%x): ", blks_saved, savedata->siglen); display_block_type(TRUE); } } else { warm_fuzzy_stuff(savedata->blk, FALSE); if (savedata->blk >= sbd.fssize) { printf("\nOut of space on the destination " "device; quitting.\n"); break; } if (lseek(fd, savedata->blk * sbd.bsize, SEEK_SET) != savedata->blk * sbd.bsize) { fprintf(stderr, "bad seek: %s from %s:" "%d: block %lld (0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)savedata->blk, (unsigned long long)savedata->blk); exit(-1); } if (write(fd, savedata->buf, sbd.bsize) != sbd.bsize) { fprintf(stderr, "write error: %s from " "%s:%d: block %lld (0x%llx)\n", strerror(errno), __FUNCTION__, __LINE__, (unsigned long long)savedata->blk, (unsigned long long)savedata->blk); exit(-1); } writes++; if (writes % 1000 == 0) fsync(fd); } blks_saved++; } if (!printblocksonly && !find_highblk) warm_fuzzy_stuff(sbd.fssize, TRUE); if (find_highblk) { printf("File system size: %lld (0x%llx) blocks, aka %sB\n", (unsigned long long)highest_valid_block, (unsigned long long)highest_valid_block, anthropomorphize(highest_valid_block * sbd.bsize)); sbd.fssize = highest_valid_block; } return 0; } static void complain(const char *complaint) { fprintf(stderr, "%s\n", complaint); die("Format is: \ngfs2_edit restoremeta " "\n"); } void restoremeta(const char *in_fn, const char *out_device, uint64_t printblocksonly) { int error; gzFile gzfd; termlines = 0; if (!in_fn) complain("No source file specified."); if (!printblocksonly && !out_device) complain("No destination file system specified."); gzfd = gzopen(in_fn, "rb"); if (!gzfd) die("Can't open source file %s: %s\n", in_fn, strerror(errno)); if (!printblocksonly) { sbd.device_fd = open(out_device, O_RDWR); if (sbd.device_fd < 0) die("Can't open destination file system %s: %s\n", out_device, strerror(errno)); } else if (out_device) /* for printsavedmeta, the out_device is an optional block no */ printblocksonly = check_keywords(out_device); savedata = malloc(sizeof(struct saved_metablock)); if (!savedata) die("Can't allocate memory for the restore operation.\n"); blks_saved = 0; restore_data(sbd.device_fd, gzfd, printblocksonly, 1); error = restore_data(sbd.device_fd, gzfd, printblocksonly, 0); printf("File %s %s %s.\n", in_fn, (printblocksonly ? "print" : "restore"), (error ? "error" : "successful")); free(savedata); gzclose(gzfd); if (!printblocksonly) close(sbd.device_fd); free(indirect); exit(error); } gfs2-utils/gfs2/edit/target.mk0000664000175000017500000000013112110647577015141 0ustar andyandy $(eval $(call make-trans-binary,/usr/sbin/gfs2_edit,gfs2/libgfs2/libgfs2.a -lncurses)) gfs2-utils/gfs2/fsck/FEATURES0000664000175000017500000000213312110647577014470 0ustar andyandyThis is a completely rewritten filesystem checker for GFS. Performance characteristics are significantly improved. The design follows the 5-pass fsck design found in "Fsck - The UNIX File System Check Program" by McKusick & Kowalkski (1994) - http://citeseer.ist.psu.edu/mckusick94fsck.html Line item list of supported features: 1. Detects and replaces missing/bad root inode 2. Detects and relinks unlinked inodes to l+f o If a file is zero length, it is not relinked to l+f - unless it has an extended attribute attached to it. 3. Detects duplicate blocks and removes inodes containing them 4. Detects bad blocks (block number out of range) and removes inodes containing them - Currently EAs that have blocks are removed but the inode containing them is left. 5. Detects bad metadata headers and clears the structure 6. Fixes bad resource group bitmaps 7. Fixes incorrect resource group counts 8. Creates l+f directory if missing 9. Detects and removes duplicate '.' and '..' entries 10. Creates '.' if missing 11. Beginning of support for internationalization 12. Checks extended attributes gfs2-utils/gfs2/fsck/Makefile.am0000664000175000017500000000174712110647577015375 0ustar andyandyMAINTAINERCLEANFILES = Makefile.in # When an exec_prefix setting would have us install into /usr/sbin, # use /sbin instead. # Accept an existing sbindir value of /usr/sbin (probably for older automake), # or an empty value, for automake-1.11 and newer. sbindir := $(shell rpl=0; test '$(exec_prefix):$(sbindir)' = /usr:/usr/sbin \ || test '$(exec_prefix):$(sbindir)' = /usr: && rpl=1; \ test $$rpl = 1 && echo /sbin || echo '$(exec_prefix)/sbin') sbin_PROGRAMS = fsck.gfs2 noinst_HEADERS = eattr.h fsck.h fs_recovery.h \ inode_hash.h link.h lost_n_found.h metawalk.h util.h fsck_gfs2_SOURCES = eattr.c fs_recovery.c initialize.c \ inode_hash.c link.c lost_n_found.c main.c metawalk.c \ pass1b.c pass1.c pass1c.c pass2.c pass3.c pass4.c \ pass5.c rgrepair.c util.c fsck_gfs2_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -DHELPER_PROGRAM \ -I$(top_srcdir)/gfs2/include \ -I$(top_srcdir)/gfs2/libgfs2 fsck_gfs2_LDADD = $(top_builddir)/gfs2/libgfs2/libgfs2.la gfs2-utils/gfs2/fsck/eattr.c0000664000175000017500000000256312110647577014621 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "fsck.h" #include "metawalk.h" #include "eattr.h" int clear_eattr_entry (struct gfs2_inode *ip, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; if (!ea_hdr->ea_name_len){ /* Skip this entry for now */ return 1; } if (!GFS2_EATYPE_VALID(ea_hdr->ea_type) && ((ea_hdr_prev) || (!ea_hdr_prev && ea_hdr->ea_type))){ /* Skip invalid entry */ return 1; } if (ea_hdr->ea_num_ptrs){ uint32_t avail_size; int max_ptrs; avail_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); max_ptrs = (be32_to_cpu(ea_hdr->ea_data_len)+avail_size-1)/avail_size; if (max_ptrs > ea_hdr->ea_num_ptrs) { return 1; } else { log_debug( _(" Pointers Required: %d\n" " Pointers Reported: %d\n"), max_ptrs, ea_hdr->ea_num_ptrs); } } return 0; } int clear_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { uint64_t block = be64_to_cpu(*ea_data_ptr); return delete_eattr_leaf(ip, block, 0, &leaf_bh, private); } gfs2-utils/gfs2/fsck/eattr.h0000664000175000017500000000131312110647577014616 0ustar andyandy#ifndef _EATTR_H #define _EATTR_H int clear_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private); int clear_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private); int clear_eattr_entry (struct gfs2_inode *ip, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private); int clear_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private); #endif /* _EATTR_H */ gfs2-utils/gfs2/fsck/fs_recovery.c0000664000175000017500000004363212171770775016036 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #define _(String) gettext(String) #include "fsck.h" #include "fs_recovery.h" #include "libgfs2.h" #include "util.h" #define JOURNAL_NAME_SIZE 16 unsigned int sd_found_jblocks = 0, sd_replayed_jblocks = 0; unsigned int sd_found_metablocks = 0, sd_replayed_metablocks = 0; unsigned int sd_found_revokes = 0; osi_list_t sd_revoke_list; unsigned int sd_replay_tail; struct gfs2_revoke_replay { osi_list_t rr_list; uint64_t rr_blkno; unsigned int rr_where; }; int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where) { osi_list_t *tmp, *head = &sd_revoke_list; struct gfs2_revoke_replay *rr; int found = 0; osi_list_foreach(tmp, head) { rr = osi_list_entry(tmp, struct gfs2_revoke_replay, rr_list); if (rr->rr_blkno == blkno) { found = 1; break; } } if (found) { rr->rr_where = where; return 0; } rr = malloc(sizeof(struct gfs2_revoke_replay)); if (!rr) return -ENOMEM; rr->rr_blkno = blkno; rr->rr_where = where; osi_list_add(&rr->rr_list, head); return 1; } int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where) { osi_list_t *tmp; struct gfs2_revoke_replay *rr; int wrap, a, b; int found = 0; osi_list_foreach(tmp, &sd_revoke_list) { rr = osi_list_entry(tmp, struct gfs2_revoke_replay, rr_list); if (rr->rr_blkno == blkno) { found = 1; break; } } if (!found) return 0; wrap = (rr->rr_where < sd_replay_tail); a = (sd_replay_tail < where); b = (where < rr->rr_where); return (wrap) ? (a || b) : (a && b); } void gfs2_revoke_clean(struct gfs2_sbd *sdp) { osi_list_t *head = &sd_revoke_list; struct gfs2_revoke_replay *rr; while (!osi_list_empty(head)) { rr = osi_list_entry(head->next, struct gfs2_revoke_replay, rr_list); osi_list_del(&rr->rr_list); free(rr); } } static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start, struct gfs2_log_descriptor *ld, __be64 *ptr, int pass) { struct gfs2_sbd *sdp = ip->i_sbd; unsigned int blks = be32_to_cpu(ld->ld_data1); struct gfs2_buffer_head *bh_log, *bh_ip; uint64_t blkno; int error = 0; if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA) return 0; gfs2_replay_incr_blk(ip, &start); for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) { uint32_t check_magic; sd_found_metablocks++; blkno = be64_to_cpu(*ptr); ptr++; if (gfs2_revoke_check(sdp, blkno, start)) continue; error = gfs2_replay_read_block(ip, start, &bh_log); if (error) return error; log_info( _("Journal replay writing metadata block #" "%lld (0x%llx) for journal+0x%x\n"), (unsigned long long)blkno, (unsigned long long)blkno, start); bh_ip = bget(sdp, blkno); if (!bh_ip) { log_err(_("Out of memory when replaying journals.\n")); return FSCK_ERROR; } memcpy(bh_ip->b_data, bh_log->b_data, sdp->bsize); check_magic = ((struct gfs2_meta_header *) (bh_ip->b_data))->mh_magic; check_magic = be32_to_cpu(check_magic); if (check_magic != GFS2_MAGIC) error = -EIO; else bmodified(bh_ip); brelse(bh_log); brelse(bh_ip); if (error) break; sd_replayed_metablocks++; } return error; } static int revoke_lo_scan_elements(struct gfs2_inode *ip, unsigned int start, struct gfs2_log_descriptor *ld, __be64 *ptr, int pass) { struct gfs2_sbd *sdp = ip->i_sbd; unsigned int blks = be32_to_cpu(ld->ld_length); unsigned int revokes = be32_to_cpu(ld->ld_data1); struct gfs2_buffer_head *bh; unsigned int offset; uint64_t blkno; int first = 1; int error; if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE) return 0; offset = sizeof(struct gfs2_log_descriptor); for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) { error = gfs2_replay_read_block(ip, start, &bh); if (error) return error; if (!first) { if (gfs2_check_meta(bh, GFS2_METATYPE_LB)) continue; } while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) { blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); log_info( _("Journal replay processing revoke for " "block #%lld (0x%llx) for journal+0x%x\n"), (unsigned long long)blkno, (unsigned long long)blkno, start); error = gfs2_revoke_add(sdp, blkno, start); if (error < 0) return error; else if (error) sd_found_revokes++; if (!--revokes) break; offset += sizeof(uint64_t); } bmodified(bh); brelse(bh); offset = sizeof(struct gfs2_meta_header); first = 0; } return 0; } static int databuf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start, struct gfs2_log_descriptor *ld, __be64 *ptr, int pass) { struct gfs2_sbd *sdp = ip->i_sbd; unsigned int blks = be32_to_cpu(ld->ld_data1); struct gfs2_buffer_head *bh_log, *bh_ip; uint64_t blkno; uint64_t esc; int error = 0; if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) return 0; gfs2_replay_incr_blk(ip, &start); for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) { blkno = be64_to_cpu(*ptr); ptr++; esc = be64_to_cpu(*ptr); ptr++; sd_found_jblocks++; if (gfs2_revoke_check(sdp, blkno, start)) continue; error = gfs2_replay_read_block(ip, start, &bh_log); if (error) return error; log_info( _("Journal replay writing data block #%lld (0x%llx)" " for journal+0x%x\n"), (unsigned long long)blkno, (unsigned long long)blkno, start); bh_ip = bget(sdp, blkno); if (!bh_ip) { log_err(_("Out of memory when replaying journals.\n")); return FSCK_ERROR; } memcpy(bh_ip->b_data, bh_log->b_data, sdp->bsize); /* Unescape */ if (esc) { __be32 *eptr = (__be32 *)bh_ip->b_data; *eptr = cpu_to_be32(GFS2_MAGIC); } brelse(bh_log); bmodified(bh_ip); brelse(bh_ip); sd_replayed_jblocks++; } return error; } /** * foreach_descriptor - go through the active part of the log * @ip: the journal incore inode * @start: the first log header in the active region * @end: the last log header (don't process the contents of this entry)) * * Call a given function once for every log descriptor in the active * portion of the log. * * Returns: errno */ static int foreach_descriptor(struct gfs2_inode *ip, unsigned int start, unsigned int end, int pass) { struct gfs2_buffer_head *bh; struct gfs2_log_descriptor *ld; int error = 0; uint32_t length; __be64 *ptr; unsigned int offset = sizeof(struct gfs2_log_descriptor); offset += sizeof(__be64) - 1; offset &= ~(sizeof(__be64) - 1); while (start != end) { uint32_t check_magic; error = gfs2_replay_read_block(ip, start, &bh); if (error) return error; check_magic = ((struct gfs2_meta_header *) (bh->b_data))->mh_magic; check_magic = be32_to_cpu(check_magic); if (check_magic != GFS2_MAGIC) { bmodified(bh); brelse(bh); return -EIO; } ld = (struct gfs2_log_descriptor *)bh->b_data; length = be32_to_cpu(ld->ld_length); if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) { struct gfs2_log_header lh; error = get_log_header(ip, start, &lh); if (!error) { gfs2_replay_incr_blk(ip, &start); bmodified(bh); brelse(bh); continue; } if (error == 1) error = -EIO; bmodified(bh); brelse(bh); return error; } else if (gfs2_check_meta(bh, GFS2_METATYPE_LD)) { bmodified(bh); brelse(bh); return -EIO; } ptr = (__be64 *)(bh->b_data + offset); error = databuf_lo_scan_elements(ip, start, ld, ptr, pass); if (error) { bmodified(bh); brelse(bh); return error; } error = buf_lo_scan_elements(ip, start, ld, ptr, pass); if (error) { bmodified(bh); brelse(bh); return error; } error = revoke_lo_scan_elements(ip, start, ld, ptr, pass); if (error) { bmodified(bh); brelse(bh); return error; } while (length--) gfs2_replay_incr_blk(ip, &start); bmodified(bh); brelse(bh); } return 0; } /** * fix_journal_seq_no - Fix log header sequencing problems * @ip: the journal incore inode */ static int fix_journal_seq_no(struct gfs2_inode *ip) { int error = 0, wrapped = 0; uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize; uint32_t blk; struct gfs2_log_header lh; uint64_t highest_seq = 0, lowest_seq = 0, prev_seq = 0; int new = 0; uint64_t dblock; uint32_t extlen; struct gfs2_buffer_head *bh; memset(&lh, 0, sizeof(lh)); for (blk = 0; blk < jd_blocks; blk++) { error = get_log_header(ip, blk, &lh); if (error == 1) /* if not a log header */ continue; /* just journal data--ignore it */ if (!lowest_seq || lh.lh_sequence < lowest_seq) lowest_seq = lh.lh_sequence; if (!highest_seq || lh.lh_sequence > highest_seq) highest_seq = lh.lh_sequence; if (lh.lh_sequence > prev_seq) { prev_seq = lh.lh_sequence; continue; } /* The sequence number is not higher than the previous one, so it's either wrap-around or a sequencing problem. */ if (!wrapped && lh.lh_sequence == lowest_seq) { wrapped = 1; prev_seq = lh.lh_sequence; continue; } log_err( _("Journal block %u (0x%x): sequence no. 0x%llx " "out of order.\n"), blk, blk, lh.lh_sequence); log_info( _("Low: 0x%llx, High: 0x%llx, Prev: 0x%llx\n"), (unsigned long long)lowest_seq, (unsigned long long)highest_seq, (unsigned long long)prev_seq); highest_seq++; lh.lh_sequence = highest_seq; prev_seq = lh.lh_sequence; log_warn( _("Renumbering it as 0x%llx\n"), lh.lh_sequence); block_map(ip, blk, &new, &dblock, &extlen, FALSE); bh = bread(ip->i_sbd, dblock); gfs2_log_header_out(&lh, bh); brelse(bh); } return 0; } /** * preen_is_safe - Can we safely preen the file system? * * If a preen option was specified (-a or -p) we're likely to have been * called from rc.sysinit. We need to determine whether this is shared * storage or not. If it's local storage (locking protocol==lock_nolock) * it's safe to preen the file system. If it's lock_dlm, it's likely * mounted by other nodes in the cluster, which is dangerous and therefore, * we should warn the user to run fsck.gfs2 manually when it's safe. */ int preen_is_safe(struct gfs2_sbd *sdp, int preen, int force_check) { if (!preen) /* If preen was not specified */ return 1; /* not called by rc.sysinit--we're okay to preen */ if (force_check) /* If check was forced by the user? */ return 1; /* user's responsibility--we're okay to preen */ if (!memcmp(sdp->sd_sb.sb_lockproto + 5, "nolock", 6)) return 1; /* local file system--preen is okay */ return 0; /* might be mounted on another node--not guaranteed safe */ } /** * gfs2_recover_journal - recovery a given journal * @ip: the journal incore inode * j: which journal to check * preen: Was preen (-a or -p) specified? * force_check: Was -f specified to force the check? * @was_clean: if the journal was originally clean, this is set to 1. * if the journal was dirty from the start, this is set to 0. * * Acquire the journal's lock, check to see if the journal is clean, and * do recovery if necessary. * * Returns: errno */ static int gfs2_recover_journal(struct gfs2_inode *ip, int j, int preen, int force_check, int *was_clean) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_log_header head; unsigned int pass; int error; *was_clean = 0; log_info( _("jid=%u: Looking at journal...\n"), j); osi_list_init(&sd_revoke_list); error = gfs2_find_jhead(ip, &head); if (error) { if (opts.no) { log_err( _("Journal #%d (\"journal%d\") is corrupt\n"),j+1, j); log_err( _("Not fixing it due to the -n option.\n")); goto out; } if (!preen_is_safe(sdp, preen, force_check)) { log_err(_("Journal #%d (\"journal%d\") is corrupt.\n"), j+1, j); log_err(_("I'm not fixing it because it may be unsafe:\n" "Locking protocol is not lock_nolock and " "the -a or -p option was specified.\n")); log_err(_("Please make sure no node has the file system " "mounted then rerun fsck.gfs2 manually " "without -a or -p.\n")); goto out; } if (!query( _("\nJournal #%d (\"journal%d\") is " "corrupt. Okay to repair it? (y/n)"), j+1, j)) { log_err( _("jid=%u: The journal was not repaired.\n"), j); goto out; } log_info( _("jid=%u: Repairing journal...\n"), j); error = fix_journal_seq_no(ip); if (error) { log_err( _("jid=%u: Unable to fix the bad journal.\n"), j); goto out; } error = gfs2_find_jhead(ip, &head); if (error) { log_err( _("jid=%u: Unable to fix the bad journal.\n"), j); goto out; } log_err( _("jid=%u: The journal was successfully fixed.\n"), j); } if (head.lh_flags & GFS2_LOG_HEAD_UNMOUNT) { log_info( _("jid=%u: Journal is clean.\n"), j); *was_clean = 1; return 0; } if (opts.no) { log_err(_("Journal #%d (\"journal%d\") is dirty\n"),j+1, j); log_err(_("not replaying due to the -n option.\n")); goto out; } if (!preen_is_safe(sdp, preen, force_check)) { log_err( _("Journal #%d (\"journal%d\") is dirty\n"), j+1, j); log_err( _("I'm not replaying it because it may be unsafe:\n" "Locking protocol is not lock_nolock and " "the -a or -p option was specified.\n")); log_err( _("Please make sure no node has the file system " "mounted then rerun fsck.gfs2 manually " "without -a or -p.\n")); error = FSCK_ERROR; goto out; } if (!query( _("\nJournal #%d (\"journal%d\") is dirty. Okay to " "replay it? (y/n)"), j+1, j)) goto reinit; log_info( _("jid=%u: Replaying journal...\n"), j); sd_found_jblocks = sd_replayed_jblocks = 0; sd_found_metablocks = sd_replayed_metablocks = 0; sd_found_revokes = 0; sd_replay_tail = head.lh_tail; for (pass = 0; pass < 2; pass++) { error = foreach_descriptor(ip, head.lh_tail, head.lh_blkno, pass); if (error) goto out; } log_info( _("jid=%u: Found %u revoke tags\n"), j, sd_found_revokes); gfs2_revoke_clean(sdp); error = clean_journal(ip, &head); if (error) goto out; log_err( _("jid=%u: Replayed %u of %u journaled data blocks\n"), j, sd_replayed_jblocks, sd_found_jblocks); log_err( _("jid=%u: Replayed %u of %u metadata blocks\n"), j, sd_replayed_metablocks, sd_found_metablocks); /* Check for errors and give them the option to reinitialize the journal. */ out: if (!error) { log_info( _("jid=%u: Done\n"), j); return 0; } log_info( _("jid=%u: Failed\n"), j); reinit: if (query( _("Do you want to clear the journal instead? (y/n)"))) error = write_journal(sdp, j, sdp->md.journal[j]->i_di.di_size / sdp->sd_sb.sb_bsize); else log_err( _("jid=%u: journal not cleared.\n"), j); return error; } /* * replay_journals - replay the journals * sdp: the super block * preen: Was preen (-a or -p) specified? * force_check: Was -f specified to force the check? * @clean_journals - set to the number of clean journals we find * * There should be a flag to the fsck to enable/disable this * feature. The fsck falls back to clearing the journal if an * inconsistency is found, but only for the bad journal. * * Returns: 0 on success, -1 on failure */ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check, int *clean_journals) { int i; int clean = 0, dirty_journals = 0, error = 0, gave_msg = 0; *clean_journals = 0; sdp->jsize = GFS2_DEFAULT_JSIZE; for(i = 0; i < sdp->md.journals; i++) { if (!sdp->md.journal[i]) { log_err(_("File system journal \"journal%d\" is " "missing: pass1 will try to recreate it.\n"), i); continue; } if (!error) { uint64_t jsize = sdp->md.journal[i]->i_di.di_size / (1024 * 1024); if (sdp->jsize == GFS2_DEFAULT_JSIZE && jsize && jsize != sdp->jsize) sdp->jsize = jsize; error = gfs2_recover_journal(sdp->md.journal[i], i, preen, force_check, &clean); if (!clean) dirty_journals++; if (!gave_msg && dirty_journals == 1 && !opts.no && preen_is_safe(sdp, preen, force_check)) { gave_msg = 1; log_notice( _("Recovering journals (this may " "take a while)\n")); } *clean_journals += clean; } } /* Sync the buffers to disk so we get a fresh start. */ fsync(sdp->device_fd); return error; } /* * ji_update - fill in journal info * sdp: the incore superblock pointer * * Given the inode for the journal index, read in all * the journal inodes. * * Returns: 0 on success, -1 on failure */ int ji_update(struct gfs2_sbd *sdp) { struct gfs2_inode *jip, *ip = sdp->md.jiinode; char journal_name[JOURNAL_NAME_SIZE]; int i, error; char buf[sizeof(struct gfs_jindex)]; struct gfs_jindex ji; if (!ip) { log_crit("Journal index inode not found.\n"); return -1; } /* The per_node directory will have 3 directory entries per node, plus two for "." and "..". So we subtract the 2 and divide by 3. If per_node is missing or damaged, we have to trust jindex has the correct number of entries. */ if (sdp->gfs1) sdp->md.journals = ip->i_di.di_size / sizeof(struct gfs_jindex); else if (sdp->md.pinode) /* if per_node was read in properly */ sdp->md.journals = (sdp->md.pinode->i_di.di_entries - 2) / 3; else sdp->md.journals = ip->i_di.di_entries - 2; if (!(sdp->md.journal = calloc(sdp->md.journals, sizeof(struct gfs2_inode *)))) { log_err(_("Unable to allocate journal index\n")); return -1; } memset(journal_name, 0, sizeof(*journal_name)); for (i = 0; i < sdp->md.journals; i++) { if (sdp->gfs1) { error = gfs2_readi(ip, buf, i * sizeof(struct gfs_jindex), sizeof(struct gfs_jindex)); if (!error) break; if (error != sizeof(struct gfs_jindex)){ log_err(_("An error occurred while reading the" " journal index file.\n")); return -1; } gfs_jindex_in(&ji, buf); sdp->md.journal[i] = lgfs2_inode_read(sdp, ji.ji_addr); if (sdp->md.journal[i] == NULL) return -1; } else { /* FIXME check snprintf return code */ snprintf(journal_name, JOURNAL_NAME_SIZE, "journal%u", i); gfs2_lookupi(sdp->md.jiinode, journal_name, strlen(journal_name), &jip); sdp->md.journal[i] = jip; } } return 0; } gfs2-utils/gfs2/fsck/fs_recovery.h0000664000175000017500000000051512110647577016030 0ustar andyandy#ifndef __FS_RECOVERY_H__ #define __FS_RECOVERY_H__ #include "libgfs2.h" extern int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check, int *clean_journals); extern int preen_is_safe(struct gfs2_sbd *sdp, int preen, int force_check); extern int ji_update(struct gfs2_sbd *sdp); #endif /* __FS_RECOVERY_H__ */ gfs2-utils/gfs2/fsck/fsck.h0000664000175000017500000001227412171730231014420 0ustar andyandy#ifndef _FSCK_H #define _FSCK_H #include "libgfs2.h" #include "osi_tree.h" #define FSCK_HASH_SHIFT (13) #define FSCK_HASH_SIZE (1 << FSCK_HASH_SHIFT) #define FSCK_HASH_MASK (FSCK_HASH_SIZE - 1) #define query(fmt, args...) fsck_query(fmt, ##args) /* * Exit codes used by fsck-type programs * Copied from e2fsck's e2fsck.h */ #define FSCK_OK 0 /* No errors */ #define FSCK_NONDESTRUCT 1 /* File system errors corrected */ #define FSCK_REBOOT 2 /* System should be rebooted */ #define FSCK_UNCORRECTED 4 /* File system errors left uncorrected */ #define FSCK_ERROR 8 /* Operational error */ #define FSCK_USAGE 16 /* Usage or syntax error */ #define FSCK_CANCELED 32 /* Aborted with a signal or ^C */ #define FSCK_LIBRARY 128 /* Shared library error */ #define BAD_POINTER_TOLERANCE 10 /* How many bad pointers is too many? */ struct inode_info { struct osi_node node; struct gfs2_inum di_num; uint32_t di_nlink; /* the number of links the inode * thinks it has */ uint32_t counted_links; /* the number of links we've found */ }; struct dir_info { struct osi_node node; struct gfs2_inum dinode; uint64_t treewalk_parent; struct gfs2_inum dotdot_parent; uint8_t checked:1; }; struct dir_status { uint8_t dotdir:1; uint8_t dotdotdir:1; uint8_t q; uint32_t entry_count; }; struct duptree { struct osi_node node; int first_ref_found; /* Has the original reference been found? */ int refs; uint64_t block; osi_list_t ref_inode_list; /* list of inodes referencing a dup block */ osi_list_t ref_invinode_list; /* list of invalid inodes referencing */ }; enum dup_ref_type { ref_as_data = 0, /* dinode references this block as a data block */ ref_as_meta = 1, /* dinode references this block as a metadata block */ ref_as_ea = 2, /* dinode references this block as an extended attr */ ref_is_inode= 3, /* The reference is itself a dinode. In other words, it's a dinode, not pointed to as data or metadata */ ref_types = 4, }; struct inode_with_dups { osi_list_t list; uint64_t block_no; int dup_count; int reftypecount[ref_types]; uint64_t parent; char *name; }; enum rgindex_trust_level { /* how far can we trust our RG index? */ blind_faith = 0, /* We'd like to trust the rgindex. We always used to before bz 179069. This should cover most cases. */ ye_of_little_faith = 1, /* The rindex seems trustworthy but there's rg damage that need to be fixed. */ open_minded = 2, /* At least 1 RG is corrupt. Try to calculate what it should be, in a perfect world where our RGs are all on even boundaries. Blue sky. Chirping birds. */ distrust = 3, /* The world isn't perfect, our RGs are not on nice neat boundaries. The fs must have been messed with by gfs2_grow or something. Count the RGs by hand. */ indignation = 4 /* Not only do we have corruption, but the rgrps aren't on even boundaries, so this file system must have been converted from gfs2_convert. */ }; extern struct gfs2_inode *fsck_load_inode(struct gfs2_sbd *sdp, uint64_t block); extern struct gfs2_inode *fsck_inode_get(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh); extern void fsck_inode_put(struct gfs2_inode **ip); extern int initialize(struct gfs2_sbd *sdp, int force_check, int preen, int *all_clean); extern void destroy(struct gfs2_sbd *sdp); extern int pass1(struct gfs2_sbd *sdp); extern int pass1b(struct gfs2_sbd *sdp); extern int pass1c(struct gfs2_sbd *sdp); extern int pass2(struct gfs2_sbd *sdp); extern int pass3(struct gfs2_sbd *sdp); extern int pass4(struct gfs2_sbd *sdp); extern int pass5(struct gfs2_sbd *sdp); extern int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane); extern int fsck_query(const char *format, ...) __attribute__((format(printf,1,2))); extern struct dir_info *dirtree_find(uint64_t block); extern void dup_delete(struct duptree *dt); extern void dirtree_delete(struct dir_info *b); /* FIXME: Hack to get this going for pass2 - this should be pulled out * of pass1 and put somewhere else... */ struct dir_info *dirtree_insert(struct gfs2_inum inum); struct gfs2_options { char *device; unsigned int yes:1; unsigned int no:1; unsigned int query:1; }; extern struct gfs2_options opts; extern struct gfs2_inode *lf_dip; /* Lost and found directory inode */ extern int lf_was_created; extern struct gfs2_bmap *bl; extern uint64_t last_fs_block, last_reported_block; extern int64_t last_reported_fblock; extern int skip_this_pass, fsck_abort; extern int errors_found, errors_corrected; extern uint64_t last_data_block; extern uint64_t first_data_block; extern struct osi_root dup_blocks; extern struct osi_root dirtree; extern struct osi_root inodetree; extern int dups_found; /* How many duplicate references have we found? */ extern int dups_found_first; /* How many duplicates have we found the original reference for? */ extern struct gfs_sb *sbd1; static inline int valid_block(struct gfs2_sbd *sdp, uint64_t blkno) { return !((blkno > sdp->fssize) || (blkno <= sdp->sb_addr) || (lgfs2_get_bitmap(sdp, blkno, NULL) < 0)); } #endif /* _FSCK_H */ gfs2-utils/gfs2/fsck/initialize.c0000664000175000017500000013346012171771031015632 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "fsck.h" #include "util.h" #include "fs_recovery.h" #include "metawalk.h" #include "inode_hash.h" #define CLEAR_POINTER(x) \ if (x) { \ free(x); \ x = NULL; \ } #define HIGHEST_BLOCK 0xffffffffffffffff static int was_mounted_ro = 0; static uint64_t possible_root = HIGHEST_BLOCK; static struct master_dir fix_md; static unsigned long long blks_2free = 0; extern int sb_fixed; /** * block_mounters * * Change the lock protocol so nobody can mount the fs * */ static int block_mounters(struct gfs2_sbd *sdp, int block_em) { if (block_em) { /* verify it starts with lock_ */ if (!strncmp(sdp->sd_sb.sb_lockproto, "lock_", 5)) { /* Change lock_ to fsck_ */ memcpy(sdp->sd_sb.sb_lockproto, "fsck_", 5); } /* FIXME: Need to do other verification in the else * case */ } else { /* verify it starts with fsck_ */ /* verify it starts with lock_ */ if (!strncmp(sdp->sd_sb.sb_lockproto, "fsck_", 5)) { /* Change fsck_ to lock_ */ memcpy(sdp->sd_sb.sb_lockproto, "lock_", 5); } } if (write_sb(sdp)) { stack; return -1; } return 0; } static void gfs2_dup_free(void) { struct osi_node *n; struct duptree *dt; while ((n = osi_first(&dup_blocks))) { dt = (struct duptree *)n; dup_delete(dt); } } static void gfs2_dirtree_free(void) { struct osi_node *n; struct dir_info *dt; while ((n = osi_first(&dirtree))) { dt = (struct dir_info *)n; dirtree_delete(dt); } } static void gfs2_inodetree_free(void) { struct osi_node *n; struct inode_info *dt; while ((n = osi_first(&inodetree))) { dt = (struct inode_info *)n; inodetree_delete(dt); } } /* * empty_super_block - free all structures in the super block * sdp: the in-core super block * * This function frees all allocated structures within the * super block. It does not free the super block itself. * * Returns: Nothing */ static void empty_super_block(struct gfs2_sbd *sdp) { log_info( _("Freeing buffers.\n")); gfs2_rgrp_free(&sdp->rgtree); if (bl) gfs2_bmap_destroy(sdp, bl); gfs2_inodetree_free(); gfs2_dirtree_free(); gfs2_dup_free(); } /** * set_block_ranges * @sdp: superblock * * Uses info in rgrps and jindex to determine boundaries of the * file system. * * Returns: 0 on success, -1 on failure */ static int set_block_ranges(struct gfs2_sbd *sdp) { struct osi_node *n, *next = NULL; struct rgrp_tree *rgd; struct gfs2_rindex *ri; char buf[sdp->sd_sb.sb_bsize]; uint64_t rmax = 0; uint64_t rmin = 0; int error; log_info( _("Setting block ranges...\n")); for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rgd = (struct rgrp_tree *)n; ri = &rgd->ri; if (ri->ri_data0 + ri->ri_data && ri->ri_data0 + ri->ri_data - 1 > rmax) rmax = ri->ri_data0 + ri->ri_data - 1; if (!rmin || ri->ri_data0 < rmin) rmin = ri->ri_data0; } last_fs_block = rmax; if (last_fs_block > 0xffffffff && sizeof(unsigned long) <= 4) { log_crit( _("This file system is too big for this computer to handle.\n")); log_crit( _("Last fs block = 0x%llx, but sizeof(unsigned long) is %zu bytes.\n"), (unsigned long long)last_fs_block, sizeof(unsigned long)); goto fail; } last_data_block = rmax; first_data_block = rmin; if (fsck_lseek(sdp->device_fd, (last_fs_block * sdp->sd_sb.sb_bsize))){ log_crit( _("Can't seek to last block in file system: %llu" " (0x%llx)\n"), (unsigned long long)last_fs_block, (unsigned long long)last_fs_block); goto fail; } memset(buf, 0, sdp->sd_sb.sb_bsize); error = read(sdp->device_fd, buf, sdp->sd_sb.sb_bsize); if (error != sdp->sd_sb.sb_bsize){ log_crit( _("Can't read last block in file system (error %u), " "last_fs_block: %llu (0x%llx)\n"), error, (unsigned long long)last_fs_block, (unsigned long long)last_fs_block); goto fail; } return 0; fail: return -1; } /** * check_rgrp_integrity - verify a rgrp free block count against the bitmap */ static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, int *fixit, int *this_rg_fixed, int *this_rg_bad, int *this_rg_cleaned) { uint32_t rg_free, rg_reclaimed, rg_unlinked; int rgb, x, y, off, bytes_to_check, total_bytes_to_check, asked = 0; unsigned int state; struct gfs_rgrp *gfs1rg = (struct gfs_rgrp *)&rgd->rg; uint64_t diblock; struct gfs2_buffer_head *bh; rg_free = rg_reclaimed = rg_unlinked = 0; total_bytes_to_check = rgd->ri.ri_bitbytes; *this_rg_fixed = *this_rg_bad = *this_rg_cleaned = 0; diblock = rgd->ri.ri_data0; for (rgb = 0; rgb < rgd->ri.ri_length; rgb++){ /* Count up the free blocks in the bitmap */ off = (rgb) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_rgrp); if (total_bytes_to_check <= sdp->bsize - off) bytes_to_check = total_bytes_to_check; else bytes_to_check = sdp->bsize - off; total_bytes_to_check -= bytes_to_check; for (x = 0; x < bytes_to_check; x++) { unsigned char *byte; byte = (unsigned char *)&rgd->bh[rgb]->b_data[off + x]; if (*byte == 0x55) { diblock += GFS2_NBBY; continue; } if (*byte == 0x00) { diblock += GFS2_NBBY; rg_free += GFS2_NBBY; continue; } for (y = 0; y < GFS2_NBBY; y++) { state = (*byte >> (GFS2_BIT_SIZE * y)) & GFS2_BIT_MASK; if (state == GFS2_BLKST_USED) { diblock++; continue; } if (state == GFS2_BLKST_DINODE) { diblock++; continue; } if (state == GFS2_BLKST_FREE) { diblock++; rg_free++; continue; } /* GFS2_BLKST_UNLINKED */ if (sdp->gfs1) log_info(_("Free metadata block 0x%llx" " found.\n"), (unsigned long long)diblock); else log_info(_("Unlinked dinode 0x%llx " "found.\n"), (unsigned long long)diblock); if (!asked) { char msg[256]; asked = 1; sprintf(msg, _("Okay to reclaim free " "metadata in resource group " "%lld (0x%llx)? (y/n)"), (unsigned long long)rgd->ri.ri_addr, (unsigned long long)rgd->ri.ri_addr); if (query("%s", msg)) *fixit = 1; } if (!(*fixit)) { rg_unlinked++; diblock++; continue; } *byte &= ~(GFS2_BIT_MASK << (GFS2_BIT_SIZE * y)); bmodified(rgd->bh[rgb]); rg_reclaimed++; rg_free++; rgd->rg.rg_free++; if (sdp->gfs1 && gfs1rg->rg_freemeta) gfs1rg->rg_freemeta--; log_info(_("Free metadata block %lld (0x%llx) " "reclaimed.\n"), (unsigned long long)diblock, (unsigned long long)diblock); bh = bread(sdp, diblock); if (!gfs2_check_meta(bh, GFS2_METATYPE_DI)) { struct gfs2_inode *ip = fsck_inode_get(sdp, bh); if (ip->i_di.di_blocks > 1) { blks_2free += ip->i_di.di_blocks - 1; log_info(_("%lld blocks " "(total) may need " "to be freed in " "pass 5.\n"), blks_2free); } fsck_inode_put(&ip); } brelse(bh); diblock++; } } } /* The unlinked blocks we reclaim shouldn't be considered errors, since we're just reclaiming them as a courtesy. If we already got permission to reclaim them, we adjust the rgrp counts accordingly. That way, only "real" rgrp count inconsistencies will be reported. */ if (rg_reclaimed && *fixit) { if (sdp->gfs1) gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]); else gfs2_rgrp_out_bh(&rgd->rg, rgd->bh[0]); *this_rg_cleaned = 1; log_info( _("The rgrp at %lld (0x%llx) was cleaned of %d " "free metadata blocks.\n"), (unsigned long long)rgd->ri.ri_addr, (unsigned long long)rgd->ri.ri_addr, rg_reclaimed); } if (rgd->rg.rg_free != rg_free) { *this_rg_bad = 1; *this_rg_cleaned = 0; log_err( _("Error: resource group %lld (0x%llx): " "free space (%d) does not match bitmap (%d)\n"), (unsigned long long)rgd->ri.ri_addr, (unsigned long long)rgd->ri.ri_addr, rgd->rg.rg_free, rg_free); if (query( _("Fix the rgrp free blocks count? (y/n)"))) { rgd->rg.rg_free = rg_free; if (sdp->gfs1) gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]); else gfs2_rgrp_out_bh(&rgd->rg, rgd->bh[0]); *this_rg_fixed = 1; log_err( _("The rgrp was fixed.\n")); } else log_err( _("The rgrp was not fixed.\n")); } if (sdp->gfs1 && gfs1rg->rg_freemeta != rg_unlinked) { *this_rg_bad = 1; *this_rg_cleaned = 0; log_err( _("Error: resource group %lld (0x%llx): " "free meta (%d) does not match bitmap (%d)\n"), (unsigned long long)rgd->ri.ri_addr, (unsigned long long)rgd->ri.ri_addr, gfs1rg->rg_freemeta, rg_unlinked); if (query( _("Fix the rgrp free meta blocks count? (y/n)"))) { gfs1rg->rg_freemeta = rg_unlinked; gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]); *this_rg_fixed = 1; log_err( _("The rgrp was fixed.\n")); } else log_err( _("The rgrp was not fixed.\n")); } /* else { log_debug( _("Resource group %lld (0x%llx) free space " "is consistent: free: %d reclaimed: %d\n"), (unsigned long long)rgd->ri.ri_addr, (unsigned long long)rgd->ri.ri_addr, rg_free, rg_reclaimed); }*/ } /** * check_rgrps_integrity - verify rgrp consistency * Note: We consider an rgrp "cleaned" if the unlinked meta blocks are * cleaned, so not quite "bad" and not quite "good" but rewritten anyway. * * Returns: 0 on success, 1 if errors were detected */ static void check_rgrps_integrity(struct gfs2_sbd *sdp) { struct osi_node *n, *next = NULL; int rgs_good = 0, rgs_bad = 0, rgs_fixed = 0, rgs_cleaned = 0; int was_bad = 0, was_fixed = 0, was_cleaned = 0; struct rgrp_tree *rgd; int reclaim_unlinked = 0; log_info( _("Checking the integrity of all resource groups.\n")); for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rgd = (struct rgrp_tree *)n; if (fsck_abort) return; check_rgrp_integrity(sdp, rgd, &reclaim_unlinked, &was_fixed, &was_bad, &was_cleaned); if (was_fixed) rgs_fixed++; if (was_cleaned) rgs_cleaned++; else if (was_bad) rgs_bad++; else rgs_good++; } if (rgs_bad || rgs_cleaned) { log_err( _("RGs: Consistent: %d Cleaned: %d Inconsistent: " "%d Fixed: %d Total: %d\n"), rgs_good, rgs_cleaned, rgs_bad, rgs_fixed, rgs_good + rgs_bad + rgs_cleaned); if (rgs_cleaned && blks_2free) log_err(_("%lld blocks may need to be freed in pass 5 " "due to the cleaned resource groups.\n"), blks_2free); } } /** * rebuild_master - rebuild a destroyed master directory */ static int rebuild_master(struct gfs2_sbd *sdp) { struct gfs2_inum inum; struct gfs2_buffer_head *bh; int err = 0; log_err(_("The system master directory seems to be destroyed.\n")); if (!query(_("Okay to rebuild it? (y/n)"))) { log_err(_("System master not rebuilt; aborting.\n")); return -1; } log_err(_("Trying to rebuild the master directory.\n")); inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = sdp->sd_sb.sb_master_dir.no_addr; bh = init_dinode(sdp, &inum, S_IFDIR | 0755, GFS2_DIF_SYSTEM, &inum); sdp->master_dir = lgfs2_inode_get(sdp, bh); if (sdp->master_dir == NULL) { log_crit(_("Error reading master: %s\n"), strerror(errno)); return -1; } sdp->master_dir->bh_owned = 1; if (fix_md.jiinode) { inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = fix_md.jiinode->i_di.di_num.no_addr; err = dir_add(sdp->master_dir, "jindex", 6, &inum, IF2DT(S_IFDIR | 0700)); if (err) { log_crit(_("Error %d adding jindex directory\n"), errno); exit(FSCK_ERROR); } sdp->master_dir->i_di.di_nlink++; } else { err = build_jindex(sdp); if (err) { log_crit(_("Error %d building jindex\n"), err); exit(FSCK_ERROR); } } if (fix_md.pinode) { inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = fix_md.pinode->i_di.di_num.no_addr; err = dir_add(sdp->master_dir, "per_node", 8, &inum, IF2DT(S_IFDIR | 0700)); if (err) { log_crit(_("Error %d adding per_node directory\n"), errno); exit(FSCK_ERROR); } sdp->master_dir->i_di.di_nlink++; } else { err = build_per_node(sdp); if (err) { log_crit(_("Error %d building per_node directory\n"), err); exit(FSCK_ERROR); } } if (fix_md.inum) { inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = fix_md.inum->i_di.di_num.no_addr; err = dir_add(sdp->master_dir, "inum", 4, &inum, IF2DT(S_IFREG | 0600)); if (err) { log_crit(_("Error %d adding inum inode\n"), errno); exit(FSCK_ERROR); } } else { err = build_inum(sdp); if (err) { log_crit(_("Error %d building inum inode\n"), err); exit(FSCK_ERROR); } gfs2_lookupi(sdp->master_dir, "inum", 4, &sdp->md.inum); } if (fix_md.statfs) { inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = fix_md.statfs->i_di.di_num.no_addr; err = dir_add(sdp->master_dir, "statfs", 6, &inum, IF2DT(S_IFREG | 0600)); if (err) { log_crit(_("Error %d adding statfs inode\n"), errno); exit(FSCK_ERROR); } } else { err = build_statfs(sdp); if (err) { log_crit(_("Error %d building statfs inode\n"), err); exit(FSCK_ERROR); } gfs2_lookupi(sdp->master_dir, "statfs", 6, &sdp->md.statfs); } if (fix_md.riinode) { inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = fix_md.riinode->i_di.di_num.no_addr; err = dir_add(sdp->master_dir, "rindex", 6, &inum, IF2DT(S_IFREG | 0600)); if (err) { log_crit(_("Error %d adding rindex inode\n"), errno); exit(FSCK_ERROR); } } else { err = build_rindex(sdp); if (err) { log_crit(_("Error %d building rindex inode\n"), err); exit(FSCK_ERROR); } } if (fix_md.qinode) { inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = fix_md.qinode->i_di.di_num.no_addr; err = dir_add(sdp->master_dir, "quota", 5, &inum, IF2DT(S_IFREG | 0600)); if (err) { log_crit(_("Error %d adding quota inode\n"), errno); exit(FSCK_ERROR); } } else { err = build_quota(sdp); if (err) { log_crit(_("Error %d building quota inode\n"), err); exit(FSCK_ERROR); } } log_err(_("Master directory rebuilt.\n")); inode_put(&sdp->md.inum); inode_put(&sdp->md.statfs); inode_put(&sdp->master_dir); return 0; } /** * lookup_per_node - Make sure the per_node directory is read in * * This function is used to read in the per_node directory. It is called * twice. The first call tries to read in the dinode early on. That ensures * that if any journals are missing, we can figure out the number of journals * from per_node. However, we unfortunately can't rebuild per_node at that * point in time because our resource groups aren't read in yet. * The second time it's called is much later when we can rebuild it. * * allow_rebuild: 0 if rebuilds are not allowed * 1 if rebuilds are allowed */ static void lookup_per_node(struct gfs2_sbd *sdp, int allow_rebuild) { if (sdp->md.pinode) return; gfs2_lookupi(sdp->master_dir, "per_node", 8, &sdp->md.pinode); if (sdp->md.pinode) return; if (!allow_rebuild) { log_err( _("The gfs2 system per_node directory " "inode is missing, so we might not be \nable to " "rebuild missing journals this run.\n")); return; } if (query( _("The gfs2 system per_node directory " "inode is missing. Okay to rebuild it? (y/n) "))) { int err; err = build_per_node(sdp); if (err) { log_crit(_("Error %d rebuilding per_node directory\n"), err); exit(FSCK_ERROR); } } gfs2_lookupi(sdp->master_dir, "per_node", 8, &sdp->md.pinode); if (!sdp->md.pinode) { log_err( _("Unable to rebuild per_node; aborting.\n")); exit(FSCK_ERROR); } } /** * fetch_rgrps - fetch the resource groups from disk, and check their integrity */ static int fetch_rgrps(struct gfs2_sbd *sdp) { enum rgindex_trust_level trust_lvl; int rgcount, sane = 1; const char *level_desc[] = { _("Checking if all rgrp and rindex values are good"), _("Checking if rindex values may be easily repaired"), _("Calculating where the rgrps should be if evenly spaced"), _("Trying to rebuild rindex assuming evenly spaced rgrps"), _("Trying to rebuild rindex assuming unevenly spaced rgrps"), }; const char *fail_desc[] = { _("Some damage was found; we need to take remedial measures"), _("rindex is unevenly spaced: either gfs1-style or corrupt"), _("rindex calculations don't match: uneven rgrp boundaries"), _("Too many rgrp misses: rgrps must be unevenly spaced"), _("Too much damage found: we cannot rebuild this rindex"), }; /******************************************************************* ******** Validate and read in resource group information ******** *******************************************************************/ log_warn( _("Validating Resource Group index.\n")); for (trust_lvl = blind_faith; trust_lvl <= indignation; trust_lvl++) { int ret = 0; log_warn( _("Level %d rgrp check: %s.\n"), trust_lvl + 1, level_desc[trust_lvl]); if ((rg_repair(sdp, trust_lvl, &rgcount, &sane) == 0) && ((ret = ri_update(sdp, 0, &rgcount, &sane)) == 0)) { log_warn( _("(level %d passed)\n"), trust_lvl + 1); break; } else { if (ret < 0) log_err( _("(level %d failed: %s)\n"), trust_lvl + 1, fail_desc[trust_lvl]); else log_err( _("(level %d failed at block %lld " "(0x%llx): %s)\n"), trust_lvl + 1, (unsigned long long)ret, (unsigned long long)ret, fail_desc[trust_lvl]); } if (fsck_abort) break; } if (trust_lvl > indignation) { log_err( _("Resource Group recovery impossible; I can't fix " "this file system.\n")); return -1; } log_info( _("%u resource groups found.\n"), rgcount); check_rgrps_integrity(sdp); return 0; } /** * init_system_inodes * * Returns: 0 on success, -1 on failure */ static int init_system_inodes(struct gfs2_sbd *sdp) { uint64_t inumbuf = 0; char *buf; struct gfs2_statfs_change sc; uint64_t addl_mem_needed; int err; /******************************************************************* ****************** Initialize important inodes ****************** *******************************************************************/ log_info( _("Initializing special inodes...\n")); /* Get root dinode */ sdp->md.rooti = lgfs2_inode_read(sdp, sdp->sd_sb.sb_root_dir.no_addr); if (sdp->md.rooti == NULL) return -1; err = fetch_rgrps(sdp); if (err) return err; /******************************************************************* ***************** Initialize more system inodes ***************** *******************************************************************/ if (!sdp->gfs1) { /* Look for "inum" entry in master dinode */ gfs2_lookupi(sdp->master_dir, "inum", 4, &sdp->md.inum); if (!sdp->md.inum) { if (!query( _("The gfs2 system inum inode is missing. " "Okay to rebuild it? (y/n) "))) { log_err( _("fsck.gfs2 cannot continue without " "a valid inum file; aborting.\n")); goto fail; } err = build_inum(sdp); if (err) { log_crit(_("Error %d rebuilding inum inode\n"), err); exit(FSCK_ERROR); } gfs2_lookupi(sdp->master_dir, "inum", 4, &sdp->md.inum); if (!sdp->md.inum) { log_crit("System inum inode was not rebuilt. " "Aborting.\n"); goto fail; } } /* Read inum entry into buffer */ err = gfs2_readi(sdp->md.inum, &inumbuf, 0, sdp->md.inum->i_di.di_size); if (err != sdp->md.inum->i_di.di_size) { log_crit(_("Error %d reading system inum inode. " "Aborting.\n"), err); goto fail; } /* call gfs2_inum_range_in() to retrieve range */ sdp->md.next_inum = be64_to_cpu(inumbuf); } if (sdp->gfs1) { sdp->md.statfs = lgfs2_inode_read(sdp, sbd1->sb_license_di.no_addr); if (sdp->md.statfs == NULL) { log_crit(_("Error reading statfs inode: %s\n"), strerror(errno)); goto fail; } } else gfs2_lookupi(sdp->master_dir, "statfs", 6, &sdp->md.statfs); if (!sdp->gfs1 && !sdp->md.statfs) { if (!query( _("The gfs2 system statfs inode is missing. " "Okay to rebuild it? (y/n) "))) { log_err( _("fsck.gfs2 cannot continue without a valid " "statfs file; aborting.\n")); goto fail; } err = build_statfs(sdp); if (err) { log_crit(_("Error %d rebuilding statfs inode\n"), err); exit(FSCK_ERROR); } gfs2_lookupi(sdp->master_dir, "statfs", 6, &sdp->md.statfs); if (!sdp->md.statfs) { log_err( _("Rebuild of statfs system file failed.")); log_err( _("fsck.gfs2 cannot continue without " "a valid statfs file; aborting.\n")); goto fail; } do_init_statfs(sdp); } if (sdp->md.statfs->i_di.di_size) { buf = malloc(sdp->md.statfs->i_di.di_size); if (buf) { err = gfs2_readi(sdp->md.statfs, buf, 0, sdp->md.statfs->i_di.di_size); if (err != sdp->md.statfs->i_di.di_size) { log_crit(_("Error %d reading statfs file. " "Aborting.\n"), err); free(buf); goto fail; } /* call gfs2_inum_range_in() to retrieve range */ gfs2_statfs_change_in(&sc, buf); free(buf); } } if (sdp->gfs1) { sdp->md.qinode = lgfs2_inode_read(sdp, sbd1->sb_quota_di.no_addr); if (sdp->md.qinode == NULL) { log_crit(_("Error reading quota inode: %s\n"), strerror(errno)); goto fail; } } else gfs2_lookupi(sdp->master_dir, "quota", 5, &sdp->md.qinode); if (!sdp->gfs1 && !sdp->md.qinode) { if (!query( _("The gfs2 system quota inode is missing. " "Okay to rebuild it? (y/n) "))) { log_crit("System quota inode was not " "rebuilt. Aborting.\n"); goto fail; } err = build_quota(sdp); if (err) { log_crit(_("Error %d rebuilding quota inode\n"), err); exit(FSCK_ERROR); } gfs2_lookupi(sdp->master_dir, "quota", 5, &sdp->md.qinode); if (!sdp->md.qinode) { log_crit("Unable to rebuild system quota file " "inode. Aborting.\n"); goto fail; } } /* Try to lookup the per_node inode. If it was missing, it is now safe to rebuild it. */ if (!sdp->gfs1) lookup_per_node(sdp, 1); /******************************************************************* ******* Now, set boundary fields in the super block ************* *******************************************************************/ if (set_block_ranges(sdp)){ log_err( _("Unable to determine the boundaries of the" " file system.\n")); goto fail; } bl = gfs2_bmap_create(sdp, last_fs_block+1, &addl_mem_needed); if (!bl) { log_crit( _("This system doesn't have enough memory and swap space to fsck this file system.\n")); log_crit( _("Additional memory needed is approximately: %lluMB\n"), (unsigned long long)(addl_mem_needed / 1048576ULL)); log_crit( _("Please increase your swap space by that amount and run gfs2_fsck again.\n")); goto fail; } return 0; fail: empty_super_block(sdp); return -1; } /** * is_journal_copy - Is this a "real" dinode or a copy inside a journal? * A real dinode will be located at the block number in its no_addr. * A journal-copy will be at a different block (inside the journal). */ static int is_journal_copy(struct gfs2_inode *ip, struct gfs2_buffer_head *bh) { if (ip->i_di.di_num.no_addr == bh->b_blocknr) return 0; return 1; /* journal copy */ } /** * peruse_system_dinode - process a system dinode * * This function looks at a system dinode and tries to figure out which * dinode it is: statfs, inum, per_node, master, etc. Some of them we * can deduce from the contents. For example, di_size will be a multiple * of 96 for the rindex. di_size will be 8 for inum, 24 for statfs, etc. * the per_node directory will have a ".." entry that will lead us to * the master dinode if it's been destroyed. */ static void peruse_system_dinode(struct gfs2_sbd *sdp, struct gfs2_dinode *di, struct gfs2_buffer_head *bh) { struct gfs2_inode *ip, *child_ip; struct gfs2_inum inum; int error; if (di->di_num.no_formal_ino == 2) { if (sdp->sd_sb.sb_master_dir.no_addr) return; log_warn(_("Found system master directory at: 0x%llx.\n"), di->di_num.no_addr); sdp->sd_sb.sb_master_dir.no_addr = di->di_num.no_addr; return; } ip = lgfs2_inode_read(sdp, di->di_num.no_addr); if (ip == NULL) { log_crit(_("Error reading inode: %s\n"), strerror(errno)); return; } if ((!sdp->gfs1 && di->di_num.no_formal_ino == 3) || (sdp->gfs1 && (di->di_flags & GFS2_DIF_JDATA) && (di->di_size % sizeof(struct gfs_jindex) == 0))) { if (fix_md.jiinode || is_journal_copy(ip, bh)) goto out_discard_ip; log_warn(_("Found system jindex file at: 0x%llx\n"), di->di_num.no_addr); fix_md.jiinode = ip; } else if (!sdp->gfs1 && is_dir(di, sdp->gfs1)) { /* Check for a jindex dir entry. Only one system dir has a jindex: master */ gfs2_lookupi(ip, "jindex", 6, &child_ip); if (child_ip) { if (fix_md.jiinode || is_journal_copy(ip, bh)) { inode_put(&child_ip); goto out_discard_ip; } fix_md.jiinode = child_ip; sdp->sd_sb.sb_master_dir.no_addr = di->di_num.no_addr; log_warn(_("Found system master directory at: " "0x%llx\n"), di->di_num.no_addr); return; } /* Check for a statfs_change0 dir entry. Only one system dir has a statfs_change: per_node, and its .. will be master. */ gfs2_lookupi(ip, "statfs_change0", 14, &child_ip); if (child_ip) { inode_put(&child_ip); if (fix_md.pinode || is_journal_copy(ip, bh)) goto out_discard_ip; log_warn(_("Found system per_node directory at: " "0x%llx\n"), ip->i_di.di_num.no_addr); fix_md.pinode = ip; error = dir_search(ip, "..", 2, NULL, &inum); if (!error && inum.no_addr) { sdp->sd_sb.sb_master_dir.no_addr = inum.no_addr; log_warn(_("From per_node\'s \'..\' I " "backtracked the master directory " "to: 0x%llx\n"), inum.no_addr); } return; } log_debug(_("Unknown system directory at block 0x%llx\n"), di->di_num.no_addr); goto out_discard_ip; } else if (!sdp->gfs1 && di->di_size == 8) { if (fix_md.inum || is_journal_copy(ip, bh)) goto out_discard_ip; fix_md.inum = ip; log_warn(_("Found system inum file at: 0x%llx\n"), di->di_num.no_addr); } else if (di->di_size == 24) { if (fix_md.statfs || is_journal_copy(ip, bh)) goto out_discard_ip; fix_md.statfs = ip; log_warn(_("Found system statfs file at: 0x%llx\n"), di->di_num.no_addr); } else if ((di->di_size % 96) == 0) { if (fix_md.riinode || is_journal_copy(ip, bh)) goto out_discard_ip; fix_md.riinode = ip; log_warn(_("Found system rindex file at: 0x%llx\n"), di->di_num.no_addr); } else if (!fix_md.qinode && di->di_size >= 176 && di->di_num.no_formal_ino >= 12 && di->di_num.no_formal_ino <= 100) { if (is_journal_copy(ip, bh)) goto out_discard_ip; fix_md.qinode = ip; log_warn(_("Found system quota file at: 0x%llx\n"), di->di_num.no_addr); } else { out_discard_ip: inode_put(&ip); } } /** * peruse_user_dinode - process a user dinode trying to find the root directory * */ static void peruse_user_dinode(struct gfs2_sbd *sdp, struct gfs2_dinode *di, struct gfs2_buffer_head *bh) { struct gfs2_inode *ip, *parent_ip; struct gfs2_inum inum; int error; if (sdp->sd_sb.sb_root_dir.no_addr) /* if we know the root dinode */ return; /* we don't need to find the root */ if (!is_dir(di, sdp->gfs1)) /* if this isn't a directory */ return; /* it can't lead us to the root anyway */ if (di->di_num.no_formal_ino == 1) { struct gfs2_buffer_head *root_bh; if (di->di_num.no_addr == bh->b_blocknr) { log_warn(_("Found the root directory at: 0x%llx.\n"), di->di_num.no_addr); sdp->sd_sb.sb_root_dir.no_addr = di->di_num.no_addr; return; } log_warn(_("The root dinode should be at block 0x%llx but it " "seems to be destroyed.\n"), (unsigned long long)di->di_num.no_addr); log_warn(_("Found a copy of the root directory in a journal " "at block: 0x%llx.\n"), (unsigned long long)bh->b_blocknr); if (!query(_("Do you want to replace the root dinode from the " "copy? (y/n)"))) { log_err(_("Damaged root dinode not fixed.\n")); return; } root_bh = bread(sdp, di->di_num.no_addr); memcpy(root_bh->b_data, bh->b_data, sdp->bsize); bmodified(root_bh); brelse(root_bh); log_warn(_("Root directory copied from the journal.\n")); return; } ip = lgfs2_inode_read(sdp, di->di_num.no_addr); if (ip == NULL) { log_crit(_("Error reading inode: %s\n"), strerror(errno)); return; } while (ip) { gfs2_lookupi(ip, "..", 2, &parent_ip); if (parent_ip && parent_ip->i_di.di_num.no_addr == ip->i_di.di_num.no_addr) { log_warn(_("Found the root directory at: 0x%llx\n"), ip->i_di.di_num.no_addr); sdp->sd_sb.sb_root_dir.no_addr = ip->i_di.di_num.no_addr; inode_put(&parent_ip); inode_put(&ip); return; } if (!parent_ip) break; inode_put(&ip); ip = parent_ip; } error = dir_search(ip, "..", 2, NULL, &inum); if (!error && inum.no_addr && inum.no_addr < possible_root) { possible_root = inum.no_addr; log_debug(_("Found a possible root at: 0x%llx\n"), (unsigned long long)possible_root); } inode_put(&ip); } /** * find_rgs_for_bsize - check a range of blocks for rgrps to determine bsize. * Assumes: device is open. */ static int find_rgs_for_bsize(struct gfs2_sbd *sdp, uint64_t startblock, uint32_t *known_bsize) { uint64_t blk, max_rg_size, rb_addr; struct gfs2_buffer_head *bh, *rb_bh; uint32_t bsize, bsize2; uint32_t chk; char *p; int found_rg; struct gfs2_meta_header mh; sdp->bsize = GFS2_DEFAULT_BSIZE; max_rg_size = 524288; /* Max RG size is 2GB. Max block size is 4K. 2G / 4K blks = 524288, So this is traversing 2GB in 4K block increments. */ for (blk = startblock; blk < startblock + max_rg_size; blk++) { bh = bread(sdp, blk); found_rg = 0; for (bsize = 0; bsize < GFS2_DEFAULT_BSIZE; bsize += GFS2_BASIC_BLOCK) { p = bh->b_data + bsize; chk = ((struct gfs2_meta_header *)p)->mh_magic; if (be32_to_cpu(chk) != GFS2_MAGIC) continue; chk = ((struct gfs2_meta_header *)p)->mh_type; if (be32_to_cpu(chk) == GFS2_METATYPE_RG) { found_rg = 1; break; } } if (!found_rg) continue; /* Try all the block sizes in 512 byte multiples */ for (bsize2 = GFS2_BASIC_BLOCK; bsize2 <= GFS2_DEFAULT_BSIZE; bsize2 += GFS2_BASIC_BLOCK) { rb_addr = (bh->b_blocknr * (GFS2_DEFAULT_BSIZE / bsize2)) + (bsize / bsize2) + 1; sdp->bsize = bsize2; /* temporarily */ rb_bh = bread(sdp, rb_addr); gfs2_meta_header_in(&mh, rb_bh); brelse(rb_bh); if (mh.mh_magic == GFS2_MAGIC && mh.mh_type == GFS2_METATYPE_RB) { log_debug(_("boff:%d bsize2:%d rg:0x%llx, " "rb:0x%llx\n"), bsize, bsize2, (unsigned long long)blk, (unsigned long long)rb_addr); *known_bsize = bsize2; break; } } brelse(bh); if (!(*known_bsize)) { sdp->bsize = GFS2_DEFAULT_BSIZE; continue; } sdp->bsize = *known_bsize; log_warn(_("Block size determined to be: %d\n"), *known_bsize); return 0; } return 0; } /** * peruse_metadata - check a range of blocks for metadata * Assumes: device is open. */ static int peruse_metadata(struct gfs2_sbd *sdp, uint64_t startblock) { uint64_t blk, max_rg_size; struct gfs2_buffer_head *bh; struct gfs2_dinode di; max_rg_size = 2147483648ull / sdp->bsize; /* Max RG size is 2GB. 2G / bsize. */ for (blk = startblock; blk < startblock + max_rg_size; blk++) { bh = bread(sdp, blk); if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) { brelse(bh); continue; } gfs2_dinode_in(&di, bh); if (di.di_flags & GFS2_DIF_SYSTEM) peruse_system_dinode(sdp, &di, bh); else peruse_user_dinode(sdp, &di, bh); brelse(bh); } return 0; } /** * sb_repair - repair a damaged superblock * Assumes: device is open. * The biggest RG size is 2GB */ static int sb_repair(struct gfs2_sbd *sdp) { uint64_t half; uint32_t known_bsize = 0; unsigned char uuid[16]; int error = 0; memset(&fix_md, 0, sizeof(fix_md)); /* Step 1 - First we need to determine the correct block size. */ sdp->bsize = GFS2_DEFAULT_BSIZE; log_warn(_("Gathering information to repair the gfs2 superblock. " "This may take some time.\n")); error = find_rgs_for_bsize(sdp, (GFS2_SB_ADDR * GFS2_BASIC_BLOCK) / GFS2_DEFAULT_BSIZE, &known_bsize); if (error) return error; if (!known_bsize) { log_warn(_("Block size not apparent; checking elsewhere.\n")); /* First, figure out the device size. We need that so we can find a suitable start point to determine what's what. */ half = sdp->dinfo.size / 2; /* in bytes */ half /= sdp->bsize; /* Start looking halfway through the device for gfs2 structures. If there aren't any at all, forget it. */ error = find_rgs_for_bsize(sdp, half, &known_bsize); if (error) return error; } if (!known_bsize) { log_err(_("Unable to determine the block size; this " "does not look like a gfs2 file system.\n")); return -1; } /* Step 2 - look for the sytem dinodes */ error = peruse_metadata(sdp, (GFS2_SB_ADDR * GFS2_BASIC_BLOCK) / GFS2_DEFAULT_BSIZE); if (error) return error; if (!sdp->sd_sb.sb_master_dir.no_addr) { log_err(_("Unable to locate the system master directory.\n")); return -1; } if (!sdp->sd_sb.sb_root_dir.no_addr) { struct gfs2_inum inum; log_err(_("Unable to locate the root directory.\n")); if (possible_root == HIGHEST_BLOCK) { /* Take advantage of the fact that mkfs.gfs2 creates master immediately after root. */ log_err(_("Can't find any dinodes that might " "be the root; using master - 1.\n")); possible_root = sdp->sd_sb.sb_master_dir.no_addr - 1; } log_err(_("Found a possible root at: 0x%llx\n"), (unsigned long long)possible_root); sdp->sd_sb.sb_root_dir.no_addr = possible_root; sdp->md.rooti = lgfs2_inode_read(sdp, possible_root); if (!sdp->md.rooti || sdp->md.rooti->i_di.di_header.mh_magic != GFS2_MAGIC) { struct gfs2_buffer_head *bh; log_err(_("The root dinode block is destroyed.\n")); log_err(_("At this point I recommend " "reinitializing it.\n" "Hopefully everything will later " "be put into lost+found.\n")); if (!query(_("Okay to reinitialize the root " "dinode? (y/n)"))) { log_err(_("The root dinode was not " "reinitialized; aborting.\n")); return -1; } inum.no_formal_ino = 1; inum.no_addr = possible_root; bh = init_dinode(sdp, &inum, S_IFDIR | 0755, 0, &inum); brelse(bh); } } /* Step 3 - Rebuild the lock protocol and file system table name */ strcpy(sdp->lockproto, GFS2_DEFAULT_LOCKPROTO); strcpy(sdp->locktable, "unknown"); if (query(_("Okay to fix the GFS2 superblock? (y/n)"))) { log_info(_("Found system master directory at: 0x%llx\n"), sdp->sd_sb.sb_master_dir.no_addr); sdp->master_dir = lgfs2_inode_read(sdp, sdp->sd_sb.sb_master_dir.no_addr); if (sdp->master_dir == NULL) { log_crit(_("Error reading master inode: %s\n"), strerror(errno)); return -1; } sdp->master_dir->i_di.di_num.no_addr = sdp->sd_sb.sb_master_dir.no_addr; log_info(_("Found the root directory at: 0x%llx\n"), sdp->sd_sb.sb_root_dir.no_addr); sdp->md.rooti = lgfs2_inode_read(sdp, sdp->sd_sb.sb_root_dir.no_addr); if (sdp->md.rooti == NULL) { log_crit(_("Error reading root inode: %s\n"), strerror(errno)); return -1; } get_random_bytes(uuid, sizeof(uuid)); build_sb(sdp, uuid); inode_put(&sdp->md.rooti); inode_put(&sdp->master_dir); sb_fixed = 1; } else { log_crit(_("GFS2 superblock not fixed; fsck cannot proceed " "without a valid superblock.\n")); return -1; } return 0; } /** * fill_super_block * @sdp: * * Returns: 0 on success, -1 on failure */ static int fill_super_block(struct gfs2_sbd *sdp) { int ret; sync(); /******************************************************************** ***************** First, initialize all lists ********************** ********************************************************************/ log_info( _("Initializing lists...\n")); sdp->rgtree.osi_node = NULL; /******************************************************************** ************ next, read in on-disk SB and set constants ********** ********************************************************************/ sdp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE; sdp->bsize = sdp->sd_sb.sb_bsize; if (sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize){ log_crit( _("GFS superblock is larger than the blocksize!\n")); log_debug("sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize\n"); return -1; } if (compute_constants(sdp)) { log_crit(_("Bad constants (1)\n")); exit(FSCK_ERROR); } ret = read_sb(sdp); if (ret < 0) { if (sb_repair(sdp) != 0) return -1; /* unrepairable, so exit */ /* Now that we've tried to repair it, re-read it. */ ret = read_sb(sdp); if (ret < 0) return -1; } if (sdp->gfs1) sbd1 = (struct gfs_sb *)&sdp->sd_sb; return 0; } static void gfs_log_header_out(struct gfs_log_header *head, char *buf) { struct gfs_log_header *str = (struct gfs_log_header *) buf; str->lh_header.mh_magic = cpu_to_be32(head->lh_header.mh_magic); str->lh_header.mh_type = cpu_to_be32(head->lh_header.mh_type); str->lh_header.mh_format = cpu_to_be32(head->lh_header.mh_format); str->lh_header.__pad0 = cpu_to_be32(head->lh_header.__pad0); str->lh_flags = cpu_to_be32(head->lh_flags); str->lh_pad = cpu_to_be32(head->lh_pad); str->lh_first = cpu_to_be64(head->lh_first); str->lh_sequence = cpu_to_be64(head->lh_sequence); str->lh_tail = cpu_to_be64(head->lh_tail); str->lh_last_dump = cpu_to_be64(head->lh_last_dump); } /* * reconstruct_single_journal - write a fresh GFS1 journal * @sdp: superblock * @jnum: journal number * * This function will write a fresh journal over the top of * the previous journal. All journal information is lost. This * process is basically stolen from write_journals() in the mkfs code. * * Returns: -1 on error, 0 otherwise */ static int reconstruct_single_journal(struct gfs2_sbd *sdp, int jnum, uint32_t ji_nsegment) { struct gfs_log_header lh; uint32_t seg, sequence; struct gfs2_buffer_head *bh; srandom(time(NULL)); sequence = ji_nsegment / (RAND_MAX + 1.0) * random(); log_info("Clearing journal %d\n", jnum); for (seg = 0; seg < ji_nsegment; seg++){ memset(&lh, 0, sizeof(struct gfs_log_header)); lh.lh_header.mh_magic = GFS2_MAGIC; lh.lh_header.mh_type = GFS2_METATYPE_LH; lh.lh_header.mh_format = GFS2_FORMAT_LH; lh.lh_header.__pad0 = 0x101674; /* mh_generation */ lh.lh_flags = GFS2_LOG_HEAD_UNMOUNT; lh.lh_first = sdp->md.journal[jnum]->i_di.di_num.no_addr + (seg * sbd1->sb_seg_size); lh.lh_sequence = sequence; bh = bget(sdp, lh.lh_first * sdp->bsize); memset(bh->b_data, 0, sdp->bsize); gfs_log_header_out(&lh, bh->b_data); gfs_log_header_out(&lh, bh->b_data + GFS2_BASIC_BLOCK - sizeof(struct gfs_log_header)); brelse(bh); if (++sequence == ji_nsegment) sequence = 0; } return 0; } /* * reconstruct_journals - write fresh journals for GFS1 only * sdp: the super block * * Returns: 0 on success, -1 on failure */ static int reconstruct_journals(struct gfs2_sbd *sdp) { int i, count; struct gfs_jindex ji; char buf[sizeof(struct gfs_jindex)]; log_err(_("Clearing GFS journals (this may take a while)\n")); for (i = 0; i < sdp->md.journals; i++) { count = gfs2_readi(sdp->md.jiinode, buf, i * sizeof(struct gfs_jindex), sizeof(struct gfs_jindex)); if (count != sizeof(struct gfs_jindex)) return 0; gfs_jindex_in(&ji, buf); if ((i % 2) == 0) log_err("."); if (reconstruct_single_journal(sdp, i, ji.ji_nsegment)) return -1; } log_err(_("\nJournals cleared.\n")); return 0; } /** * init_rindex - read in the rindex file */ static int init_rindex(struct gfs2_sbd *sdp) { int err; if (sdp->gfs1) sdp->md.riinode = lgfs2_inode_read(sdp, sbd1->sb_rindex_di.no_addr); else gfs2_lookupi(sdp->master_dir, "rindex", 6, &sdp->md.riinode); if (sdp->md.riinode) return 0; if (!query( _("The gfs2 system rindex inode is missing. " "Okay to rebuild it? (y/n) "))) { log_crit(_("Error: Cannot proceed without a valid rindex.\n")); return -1; } if ((err = build_rindex(sdp))) { log_crit(_("Error %d rebuilding rindex\n"), err); return -1; } return 0; } /** * init_jindex - read in the rindex file */ static int init_jindex(struct gfs2_sbd *sdp) { /******************************************************************* ****************** Fill in journal information ****************** *******************************************************************/ /* rgrepair requires the journals be read in in order to distinguish "real" rgrps from rgrps that are just copies left in journals. */ if (sdp->gfs1) sdp->md.jiinode = lgfs2_inode_read(sdp, sbd1->sb_jindex_di.no_addr); else gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode); if (!sdp->md.jiinode) { int err; if (!query( _("The gfs2 system jindex inode is missing. " "Okay to rebuild it? (y/n) "))) { log_crit(_("Error: cannot proceed without a valid " "jindex file.\n")); return -1; } /* In order to rebuild jindex, we need some valid rgrps in memory. Temporarily read those in. */ err = fetch_rgrps(sdp); if (err) return err; err = build_jindex(sdp); /* Free rgrps read in earlier (re-read them later) */ gfs2_rgrp_free(&sdp->rgtree); if (err) { log_crit(_("Error %d rebuilding jindex\n"), err); return err; } } /* read in the ji data */ if (ji_update(sdp)){ log_err( _("Unable to read in jindex inode.\n")); return -1; } return 0; } /** * initialize - initialize superblock pointer * */ int initialize(struct gfs2_sbd *sdp, int force_check, int preen, int *all_clean) { int clean_journals = 0, open_flag; *all_clean = 0; if (opts.no) open_flag = O_RDONLY; else open_flag = O_RDWR | O_EXCL; sdp->device_fd = open(opts.device, open_flag); if (sdp->device_fd < 0) { int is_mounted, ro; if (open_flag == O_RDONLY || errno != EBUSY) { log_crit( _("Unable to open device: %s\n"), opts.device); return FSCK_USAGE; } /* We can't open it EXCL. It may be already open rw (in which case we want to deny them access) or it may be mounted as the root file system at boot time (in which case we need to allow it.) We use is_pathname_mounted here even though we're specifying a device name, not a path name. The function checks for device as well. */ strncpy(sdp->device_name, opts.device, sizeof(sdp->device_name)); sdp->path_name = sdp->device_name; /* This gets overwritten */ is_mounted = is_pathname_mounted(sdp, &ro); /* If the device is busy, but not because it's mounted, fail. This protects against cases where the file system is LVM and perhaps mounted on a different node. */ if (!is_mounted) goto mount_fail; /* If the device is mounted, but not mounted RO, fail. This protects them against cases where the file system is mounted RW, but still allows us to check our own root file system. */ if (!ro) goto mount_fail; /* The device is mounted RO, so it's likely our own root file system. We can only do so much to protect the users from themselves. Try opening without O_EXCL. */ if ((sdp->device_fd = open(opts.device, O_RDWR)) < 0) goto mount_fail; was_mounted_ro = 1; } if (lgfs2_get_dev_info(sdp->device_fd, &sdp->dinfo)) { perror(sdp->device_name); return FSCK_ERROR; } /* read in sb from disk */ if (fill_super_block(sdp)) return FSCK_ERROR; /* Change lock protocol to be fsck_* instead of lock_* */ if (!opts.no && preen_is_safe(sdp, preen, force_check)) { if (block_mounters(sdp, 1)) { log_err( _("Unable to block other mounters\n")); return FSCK_USAGE; } } /* Get master dinode */ if (sdp->gfs1) sdp->master_dir = NULL; else sdp->master_dir = lgfs2_inode_read(sdp, sdp->sd_sb.sb_master_dir.no_addr); if (!sdp->gfs1 && (sdp->master_dir->i_di.di_header.mh_magic != GFS2_MAGIC || sdp->master_dir->i_di.di_header.mh_type != GFS2_METATYPE_DI || !sdp->master_dir->i_di.di_size)) { inode_put(&sdp->master_dir); rebuild_master(sdp); sdp->master_dir = lgfs2_inode_read(sdp, sdp->sd_sb.sb_master_dir.no_addr); if (sdp->master_dir == NULL) { log_crit(_("Error reading master directory: %s\n"), strerror(errno)); return FSCK_ERROR; } } /* Look up the "per_node" inode. If there are journals missing, we need to figure out what's missing from per_node. And we need all our journals to be there before we can replay them. */ if (!sdp->gfs1) lookup_per_node(sdp, 0); /* We need rindex first in case jindex is missing and needs to read in the rgrps before rebuilding it. */ if (init_rindex(sdp)) return FSCK_ERROR; /* We need to read in jindex in order to replay the journals */ if (init_jindex(sdp)) return FSCK_ERROR; /* If GFS, rebuild the journals. If GFS2, replay them. We don't have the smarts to replay GFS1 journals (neither did gfs_fsck). */ if (sdp->gfs1) { if (reconstruct_journals(sdp)) return FSCK_ERROR; } else if (replay_journals(sdp, preen, force_check, &clean_journals)) { if (!opts.no && preen_is_safe(sdp, preen, force_check)) block_mounters(sdp, 0); stack; return FSCK_ERROR; } if (sdp->md.journals == clean_journals) *all_clean = 1; else { if (force_check || !preen) log_notice( _("\nJournal recovery complete.\n")); } if (!force_check && *all_clean && preen) return FSCK_OK; if (init_system_inodes(sdp)) return FSCK_ERROR; return FSCK_OK; mount_fail: log_crit( _("Device %s is busy.\n"), opts.device); return FSCK_USAGE; } void destroy(struct gfs2_sbd *sdp) { if (!opts.no) { if (block_mounters(sdp, 0)) { log_warn( _("Unable to unblock other mounters - manual intervention required\n")); log_warn( _("Use 'gfs2_tool sb proto' to fix\n")); } log_info( _("Syncing the device.\n")); fsync(sdp->device_fd); } empty_super_block(sdp); close(sdp->device_fd); if (was_mounted_ro && errors_corrected) { sdp->device_fd = open("/proc/sys/vm/drop_caches", O_WRONLY); if (sdp->device_fd >= 0) { write(sdp->device_fd, "2", 1); close(sdp->device_fd); } else log_err( _("fsck.gfs2: Non-fatal error dropping " "caches.\n")); } } gfs2-utils/gfs2/fsck/inode_hash.c0000664000175000017500000000315212110647577015576 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include "libgfs2.h" #include "osi_list.h" #include "inode_hash.h" #include "fsck.h" #define _(String) gettext(String) struct inode_info *inodetree_find(uint64_t block) { struct osi_node *node = inodetree.osi_node; while (node) { struct inode_info *data = (struct inode_info *)node; if (block < data->di_num.no_addr) node = node->osi_left; else if (block > data->di_num.no_addr) node = node->osi_right; else return data; } return NULL; } struct inode_info *inodetree_insert(struct gfs2_inum di_num) { struct osi_node **newn = &inodetree.osi_node, *parent = NULL; struct inode_info *data; /* Figure out where to put new node */ while (*newn) { struct inode_info *cur = (struct inode_info *)*newn; parent = *newn; if (di_num.no_addr < cur->di_num.no_addr) newn = &((*newn)->osi_left); else if (di_num.no_addr > cur->di_num.no_addr) newn = &((*newn)->osi_right); else return cur; } data = malloc(sizeof(struct inode_info)); if (!data) { log_crit( _("Unable to allocate inode_info structure\n")); return NULL; } if (!memset(data, 0, sizeof(struct inode_info))) { log_crit( _("Error while zeroing inode_info structure\n")); return NULL; } /* Add new node and rebalance tree. */ data->di_num.no_addr = di_num.no_addr; data->di_num.no_formal_ino = di_num.no_formal_ino; osi_link_node(&data->node, parent, newn); osi_insert_color(&data->node, &inodetree); return data; } void inodetree_delete(struct inode_info *b) { osi_erase(&b->node, &inodetree); free(b); } gfs2-utils/gfs2/fsck/inode_hash.h0000664000175000017500000000042012110647577015576 0ustar andyandy#ifndef _INODE_HASH_H #define _INODE_HASH_H struct inode_info; extern struct inode_info *inodetree_find(uint64_t block); extern struct inode_info *inodetree_insert(struct gfs2_inum di_num); extern void inodetree_delete(struct inode_info *b); #endif /* _INODE_HASH_H */ gfs2-utils/gfs2/fsck/link.c0000664000175000017500000000465012110647577014436 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "fsck.h" #include "inode_hash.h" #include "link.h" int set_di_nlink(struct gfs2_inode *ip) { struct inode_info *ii; /*log_debug( _("Setting link count to %u for %" PRIu64 " (0x%" PRIx64 ")\n"), count, inode_no, inode_no);*/ /* If the list has entries, look for one that matches inode_no */ ii = inodetree_find(ip->i_di.di_num.no_addr); if (!ii) ii = inodetree_insert(ip->i_di.di_num); if (ii) ii->di_nlink = ip->i_di.di_nlink; else return -1; return 0; } int incr_link_count(struct gfs2_inum no, struct gfs2_inode *ip, const char *why) { struct inode_info *ii = NULL; uint64_t referenced_from = ip ? ip->i_di.di_num.no_addr : 0; ii = inodetree_find(no.no_addr); /* If the list has entries, look for one that matches inode_no */ if (ii) { if (ii->di_num.no_formal_ino != no.no_formal_ino) return 1; ii->counted_links++; log_debug( _("Dir (0x%llx) incremented counted " "links to %u for (0x%llx) via %s\n"), (unsigned long long)referenced_from, ii->counted_links, (unsigned long long)no.no_addr, why); return 0; } log_debug( _("Ref: (0x%llx) No match found when incrementing " "link for (0x%llx)!\n"), (unsigned long long)referenced_from, (unsigned long long)no.no_addr); /* If no match was found, add a new entry and set its * counted links to 1 */ ii = inodetree_insert(no); if (ii) ii->counted_links = 1; else return -1; return 0; } int decr_link_count(uint64_t inode_no, uint64_t referenced_from, const char *why) { struct inode_info *ii = NULL; ii = inodetree_find(inode_no); /* If the list has entries, look for one that matches * inode_no */ if (ii) { if (!ii->counted_links) { log_debug( _("Dir (0x%llx)'s link to " "(0x%llx) via %s is zero!\n"), (unsigned long long)referenced_from, (unsigned long long)inode_no, why); return 0; } ii->counted_links--; log_debug( _("Dir (0x%llx) decremented counted " "links to %u for (0x%llx) via %s\n"), (unsigned long long)referenced_from, ii->counted_links, (unsigned long long)inode_no, why); return 0; } log_debug( _("No match found when decrementing link for (0x%llx)!\n"), (unsigned long long)inode_no); return -1; } gfs2-utils/gfs2/fsck/link.h0000664000175000017500000000042112110647577014433 0ustar andyandy#ifndef _LINK_H #define _LINK_H int set_di_nlink(struct gfs2_inode *ip); int incr_link_count(struct gfs2_inum no, struct gfs2_inode *ip, const char *why); int decr_link_count(uint64_t inode_no, uint64_t referenced_from, const char *why); #endif /* _LINK_H */ gfs2-utils/gfs2/fsck/lost_n_found.c0000664000175000017500000002017512171730231016155 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #define _(String) gettext(String) #include "fsck.h" #include "libgfs2.h" #include "lost_n_found.h" #include "link.h" #include "metawalk.h" #include "util.h" static void add_dotdot(struct gfs2_inode *ip) { struct dir_info *di; struct gfs2_sbd *sdp = ip->i_sbd; int err; log_info( _("Adding .. entry to directory %llu (0x%llx) pointing back " "to lost+found\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); /* If there's a pre-existing .. directory entry, we have to back out the links. */ di = dirtree_find(ip->i_di.di_num.no_addr); if (di && valid_block(sdp, di->dotdot_parent.no_addr)) { struct gfs2_inode *dip; log_debug(_("Directory (0x%llx) already had a " "\"..\" link to (0x%llx).\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)di->dotdot_parent.no_addr); dip = fsck_load_inode(sdp, di->dotdot_parent.no_addr); if (dip->i_di.di_num.no_formal_ino == di->dotdot_parent.no_formal_ino) { decr_link_count(di->dotdot_parent.no_addr, ip->i_di.di_num.no_addr, _(".. unlinked, moving to lost+found")); if (dip->i_di.di_nlink > 0) { dip->i_di.di_nlink--; set_di_nlink(dip); /* keep inode tree in sync */ log_debug(_("Decrementing its links to %d\n"), dip->i_di.di_nlink); bmodified(dip->i_bh); } else if (!dip->i_di.di_nlink) { log_debug(_("Its link count is zero.\n")); } else { log_debug(_("Its link count is %d! Changing " "it to 0.\n"), dip->i_di.di_nlink); dip->i_di.di_nlink = 0; set_di_nlink(dip); /* keep inode tree in sync */ bmodified(dip->i_bh); } } else { log_debug(_("Directory (0x%llx)'s link to parent " "(0x%llx) had a formal inode discrepancy: " "was 0x%llx, expected 0x%llx\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)di->dotdot_parent.no_addr, di->dotdot_parent.no_formal_ino, dip->i_di.di_num.no_formal_ino); log_debug(_("The parent directory was not changed.\n")); } fsck_inode_put(&dip); di = NULL; } else { if (di) log_debug(_("Couldn't find a valid \"..\" entry " "for orphan directory (0x%llx): " "'..' = 0x%llx\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)di->dotdot_parent.no_addr); else log_debug(_("Couldn't find directory (0x%llx) " "in directory tree.\n"), (unsigned long long)ip->i_di.di_num.no_addr); } if (gfs2_dirent_del(ip, "..", 2)) log_warn( _("add_inode_to_lf: Unable to remove " "\"..\" directory entry.\n")); err = dir_add(ip, "..", 2, &(lf_dip->i_di.di_num), (sdp->gfs1 ? GFS_FILE_DIR : DT_DIR)); if (err) { log_crit(_("Error adding .. directory: %s\n"), strerror(errno)); exit(FSCK_ERROR); } } void make_sure_lf_exists(struct gfs2_inode *ip) { uint8_t q; struct dir_info *di; struct gfs2_sbd *sdp = ip->i_sbd; uint32_t mode; if (lf_dip) return; log_info( _("Locating/Creating lost+found directory\n")); /* if this is gfs1, we have to trick createi into using no_formal_ino = no_addr, so we set next_inum to the free block we're about to allocate. */ if (sdp->gfs1) sdp->md.next_inum = find_free_blk(sdp); mode = (sdp->gfs1 ? DT2IF(GFS_FILE_DIR) : S_IFDIR) | 0700; if (sdp->gfs1) lf_dip = gfs_createi(sdp->md.rooti, "lost+found", mode, 0); else lf_dip = createi(sdp->md.rooti, "lost+found", S_IFDIR | 0700, 0); if (lf_dip == NULL) { log_crit(_("Error creating lost+found: %s\n"), strerror(errno)); exit(FSCK_ERROR); } /* createi will have incremented the di_nlink link count for the root directory. We must set the nlink value in the hash table to keep them in sync so that pass4 can detect and fix any descrepancies. */ set_di_nlink(sdp->md.rooti); q = block_type(lf_dip->i_di.di_num.no_addr); if (q != gfs2_inode_dir) { lf_was_created = 1; /* This is a new lost+found directory, so set its block type and increment link counts for the directories */ /* FIXME: i'd feel better about this if fs_mkdir returned whether it created a new directory or just found an old one, and we used that instead of the block_type to run this */ fsck_blockmap_set(ip, lf_dip->i_di.di_num.no_addr, _("lost+found dinode"), gfs2_inode_dir); dirtree_insert(lf_dip->i_di.di_num); /* root inode links to lost+found */ incr_link_count(sdp->md.rooti->i_di.di_num, lf_dip, _("root")); /* lost+found link for '.' from itself */ incr_link_count(lf_dip->i_di.di_num, lf_dip, "\".\""); /* lost+found link for '..' back to root */ incr_link_count(lf_dip->i_di.di_num, sdp->md.rooti, "\"..\""); if (sdp->gfs1) lf_dip->i_di.__pad1 = GFS_FILE_DIR; } log_info( _("lost+found directory is dinode %lld (0x%llx)\n"), (unsigned long long)lf_dip->i_di.di_num.no_addr, (unsigned long long)lf_dip->i_di.di_num.no_addr); di = dirtree_find(lf_dip->i_di.di_num.no_addr); if (di) { log_info( _("Marking lost+found inode connected\n")); di->checked = 1; di = NULL; } } /* add_inode_to_lf - Add dir entry to lost+found for the inode * @ip: inode to add to lost + found * * This function adds an entry into the lost and found dir * for the given inode. The name of the entry will be * "lost_i_num.no_addr>". * * Returns: 0 on success, -1 on failure. */ int add_inode_to_lf(struct gfs2_inode *ip){ char tmp_name[256]; __be32 inode_type; uint64_t lf_blocks; struct gfs2_sbd *sdp = ip->i_sbd; int err = 0; uint32_t mode; make_sure_lf_exists(ip); if (ip->i_di.di_num.no_addr == lf_dip->i_di.di_num.no_addr) { log_err( _("Trying to add lost+found to itself...skipping")); return 0; } lf_blocks = lf_dip->i_di.di_blocks; if (sdp->gfs1) mode = gfs_to_gfs2_mode(ip); else mode = ip->i_di.di_mode & S_IFMT; switch (mode) { case S_IFDIR: add_dotdot(ip); sprintf(tmp_name, "lost_dir_%llu", (unsigned long long)ip->i_di.di_num.no_addr); inode_type = (sdp->gfs1 ? GFS_FILE_DIR : DT_DIR); break; case S_IFREG: sprintf(tmp_name, "lost_file_%llu", (unsigned long long)ip->i_di.di_num.no_addr); inode_type = (sdp->gfs1 ? GFS_FILE_REG : DT_REG); break; case S_IFLNK: sprintf(tmp_name, "lost_link_%llu", (unsigned long long)ip->i_di.di_num.no_addr); inode_type = (sdp->gfs1 ? GFS_FILE_LNK : DT_LNK); break; case S_IFBLK: sprintf(tmp_name, "lost_blkdev_%llu", (unsigned long long)ip->i_di.di_num.no_addr); inode_type = (sdp->gfs1 ? GFS_FILE_BLK : DT_BLK); break; case S_IFCHR: sprintf(tmp_name, "lost_chrdev_%llu", (unsigned long long)ip->i_di.di_num.no_addr); inode_type = (sdp->gfs1 ? GFS_FILE_CHR : DT_CHR); break; case S_IFIFO: sprintf(tmp_name, "lost_fifo_%llu", (unsigned long long)ip->i_di.di_num.no_addr); inode_type = (sdp->gfs1 ? GFS_FILE_FIFO : DT_FIFO); break; case S_IFSOCK: sprintf(tmp_name, "lost_socket_%llu", (unsigned long long)ip->i_di.di_num.no_addr); inode_type = (sdp->gfs1 ? GFS_FILE_SOCK : DT_SOCK); break; default: sprintf(tmp_name, "lost_%llu", (unsigned long long)ip->i_di.di_num.no_addr); inode_type = (sdp->gfs1 ? GFS_FILE_REG : DT_REG); break; } err = dir_add(lf_dip, tmp_name, strlen(tmp_name), &(ip->i_di.di_num), inode_type); if (err) { log_crit(_("Error adding directory %s: %s\n"), tmp_name, strerror(errno)); exit(FSCK_ERROR); } /* If the lf directory had new blocks added we have to mark them properly in the bitmap so they're not freed. */ if (lf_dip->i_di.di_blocks != lf_blocks) reprocess_inode(lf_dip, "lost+found"); /* This inode is linked from lost+found */ incr_link_count(ip->i_di.di_num, lf_dip, _("from lost+found")); /* If it's a directory, lost+found is back-linked to it via .. */ if (mode == S_IFDIR) incr_link_count(lf_dip->i_di.di_num, ip, _("to lost+found")); log_notice( _("Added inode #%llu (0x%llx) to lost+found\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); gfs2_dinode_out(&lf_dip->i_di, lf_dip->i_bh); bwrite(lf_dip->i_bh); return 0; } gfs2-utils/gfs2/fsck/lost_n_found.h0000664000175000017500000000031312154127655016165 0ustar andyandy#ifndef __LOST_N_FOUND_H__ #define __LOST_N_FOUND_H__ #include "libgfs2.h" int add_inode_to_lf(struct gfs2_inode *ip); void make_sure_lf_exists(struct gfs2_inode *ip); #endif /* __LOST_N_FOUND_H__ */ gfs2-utils/gfs2/fsck/main.c0000664000175000017500000002004612171730233014407 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #define _(String) gettext(String) #include "copyright.cf" #include "libgfs2.h" #include "fsck.h" #include "osi_list.h" #include "metawalk.h" #include "util.h" struct gfs2_options opts = {0}; struct gfs2_inode *lf_dip = NULL; /* Lost and found directory inode */ int lf_was_created = 0; struct gfs2_bmap *bl = NULL; uint64_t last_fs_block, last_reported_block = -1; int64_t last_reported_fblock = -1000000; int skip_this_pass = FALSE, fsck_abort = FALSE; int errors_found = 0, errors_corrected = 0; const char *pass = ""; uint64_t last_data_block; uint64_t first_data_block; int preen = 0, force_check = 0; struct osi_root dup_blocks = (struct osi_root) { NULL, }; struct osi_root dirtree = (struct osi_root) { NULL, }; struct osi_root inodetree = (struct osi_root) { NULL, }; int dups_found = 0, dups_found_first = 0; struct gfs_sb *sbd1 = NULL; int sb_fixed = 0; /* This function is for libgfs2's sake. */ void print_it(const char *label, const char *fmt, const char *fmt2, ...) { va_list args; va_start(args, fmt2); printf("%s: ", label); vprintf(fmt, args); va_end(args); } static void usage(char *name) { printf("Usage: %s [-afhnpqvVy] \n", basename(name)); } static void version(void) { printf( _("GFS2 fsck %s (built %s %s)\n"), VERSION, __DATE__, __TIME__); printf(REDHAT_COPYRIGHT "\n"); } static int read_cmdline(int argc, char **argv, struct gfs2_options *gopts) { int c; while ((c = getopt(argc, argv, "afhnpqvyV")) != -1) { switch(c) { case 'a': preen = 1; gopts->yes = 1; break; case 'f': force_check = 1; break; case 'h': usage(argv[0]); exit(FSCK_OK); break; case 'n': gopts->no = 1; break; case 'p': preen = 1; gopts->yes = 1; break; case 'q': decrease_verbosity(); break; case 'v': increase_verbosity(); break; case 'V': version(); exit(FSCK_OK); break; case 'y': gopts->yes = 1; break; case ':': case '?': fprintf(stderr, _("Please use '-h' for help.\n")); return FSCK_USAGE; default: fprintf(stderr, _("Invalid option %c\n"), c); return FSCK_USAGE; } } if (argc > optind) { gopts->device = (argv[optind]); if (!gopts->device) { fprintf(stderr, _("Please use '-h' for help.\n")); return FSCK_USAGE; } } else { fprintf(stderr, _("No device specified (Please use '-h' for help)\n")); return FSCK_USAGE; } return 0; } static void interrupt(int sig) { char response; char progress[PATH_MAX]; if (!last_reported_block || last_reported_block == last_fs_block) sprintf(progress, _("progress unknown.\n")); else sprintf(progress, _("processing block %llu out of %llu\n"), (unsigned long long)last_reported_block, (unsigned long long)last_fs_block); response = generic_interrupt("gfs2_fsck", pass, progress, _("Do you want to abort gfs2_fsck, skip " \ "the rest of this pass or continue " \ "(a/s/c)?"), "asc"); if (tolower(response) == 's') { skip_this_pass = TRUE; return; } else if (tolower(response) == 'a') { fsck_abort = TRUE; return; } } static int check_statfs(struct gfs2_sbd *sdp) { struct osi_node *n, *next = NULL; struct rgrp_tree *rgd; struct gfs2_rindex *ri; struct gfs2_statfs_change sc; char buf[sizeof(struct gfs2_statfs_change)]; int count; if (sdp->gfs1 && !sdp->md.statfs->i_di.di_size) { log_info("This GFS1 file system is not using fast_statfs.\n"); return 0; } /* Read the current statfs values */ count = gfs2_readi(sdp->md.statfs, buf, 0, sdp->md.statfs->i_di.di_size); if (count == sizeof(struct gfs2_statfs_change)) gfs2_statfs_change_in(&sc, buf); /* Calculate the real values from the rgrp information */ sdp->blks_total = 0; sdp->blks_alloced = 0; sdp->dinodes_alloced = 0; for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rgd = (struct rgrp_tree *)n; ri = &rgd->ri; sdp->blks_total += ri->ri_data; sdp->blks_alloced += (ri->ri_data - rgd->rg.rg_free); sdp->dinodes_alloced += rgd->rg.rg_dinodes; } /* See if they match */ if (sc.sc_total == sdp->blks_total && sc.sc_free == (sdp->blks_total - sdp->blks_alloced) && sc.sc_dinodes == sdp->dinodes_alloced) { log_info( _("The statfs file is accurate.\n")); return 0; } log_err( _("The statfs file is wrong:\n\n")); log_err( _("Current statfs values:\n")); log_err( _("blocks: %lld (0x%llx)\n"), (unsigned long long)sc.sc_total, (unsigned long long)sc.sc_total); log_err( _("free: %lld (0x%llx)\n"), (unsigned long long)sc.sc_free, (unsigned long long)sc.sc_free); log_err( _("dinodes: %lld (0x%llx)\n\n"), (unsigned long long)sc.sc_dinodes, (unsigned long long)sc.sc_dinodes); log_err( _("Calculated statfs values:\n")); log_err( _("blocks: %lld (0x%llx)\n"), (unsigned long long)sdp->blks_total, (unsigned long long)sdp->blks_total); log_err( _("free: %lld (0x%llx)\n"), (unsigned long long)(sdp->blks_total - sdp->blks_alloced), (unsigned long long)(sdp->blks_total - sdp->blks_alloced)); log_err( _("dinodes: %lld (0x%llx)\n"), (unsigned long long)sdp->dinodes_alloced, (unsigned long long)sdp->dinodes_alloced); errors_found++; if (!query( _("Okay to fix the master statfs file? (y/n)"))) { log_err( _("The statfs file was not fixed.\n")); return 0; } do_init_statfs(sdp); log_err( _("The statfs file was fixed.\n")); errors_corrected++; return 0; } struct fsck_pass { const char *name; int (*f)(struct gfs2_sbd *sdp); }; static const struct fsck_pass passes[] = { { .name = "pass1", .f = pass1 }, { .name = "pass1b", .f = pass1b }, { .name = "pass1c", .f = pass1c }, { .name = "pass2", .f = pass2 }, { .name = "pass3", .f = pass3 }, { .name = "pass4", .f = pass4 }, { .name = "pass5", .f = pass5 }, { .name = "check_statfs", .f = check_statfs }, { .name = NULL, } }; static int fsck_pass(const struct fsck_pass *p, struct gfs2_sbd *sdp) { int ret; if (fsck_abort) return FSCK_CANCELED; pass = p->name; log_notice( _("Starting %s\n"), p->name); ret = p->f(sdp); if (ret) exit(ret); if (skip_this_pass || fsck_abort) { skip_this_pass = 0; log_notice( _("%s interrupted \n"), p->name); return FSCK_CANCELED; } log_notice( _("%s complete \n"), p->name); return 0; } int main(int argc, char **argv) { struct gfs2_sbd sb; struct gfs2_sbd *sdp = &sb; int j; int i; int error = 0; int all_clean = 0; struct sigaction act = { .sa_handler = interrupt, }; setlocale(LC_ALL, ""); textdomain("gfs2-utils"); memset(sdp, 0, sizeof(*sdp)); if ((error = read_cmdline(argc, argv, &opts))) exit(error); setbuf(stdout, NULL); log_notice( _("Initializing fsck\n")); if ((error = initialize(sdp, force_check, preen, &all_clean))) exit(error); if (!force_check && all_clean && preen) { log_err( _("%s: clean.\n"), opts.device); destroy(sdp); exit(FSCK_OK); } sigaction(SIGINT, &act, NULL); for (i = 0; passes[i].name; i++) error = fsck_pass(passes + i, sdp); /* Free up our system inodes */ if (!sdp->gfs1) inode_put(&sdp->md.inum); inode_put(&sdp->md.statfs); for (j = 0; j < sdp->md.journals; j++) inode_put(&sdp->md.journal[j]); free(sdp->md.journal); sdp->md.journal = NULL; inode_put(&sdp->md.jiinode); inode_put(&sdp->md.riinode); inode_put(&sdp->md.qinode); if (!sdp->gfs1) inode_put(&sdp->md.pinode); inode_put(&sdp->md.rooti); if (!sdp->gfs1) inode_put(&sdp->master_dir); if (lf_dip) inode_put(&lf_dip); if (!opts.no && errors_corrected) log_notice( _("Writing changes to disk\n")); fsync(sdp->device_fd); destroy(sdp); if (sb_fixed) log_warn(_("Superblock was reset. Use tunegfs2 to manually " "set lock table before mounting.\n")); log_notice( _("gfs2_fsck complete\n")); if (!error) { if (!errors_found) error = FSCK_OK; else if (errors_found == errors_corrected) error = FSCK_NONDESTRUCT; else error = FSCK_UNCORRECTED; } exit(error); } gfs2-utils/gfs2/fsck/metawalk.c0000664000175000017500000016450512154127655015312 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "osi_tree.h" #include "fsck.h" #include "util.h" #include "metawalk.h" #include "inode_hash.h" #define COMFORTABLE_BLKS 5242880 /* 20GB in 4K blocks */ /* There are two bitmaps: (1) The "blockmap" that fsck uses to keep track of what block type has been discovered, and (2) The rgrp bitmap. Function gfs2_blockmap_set is used to set the former and gfs2_set_bitmap is used to set the latter. The two must be kept in sync, otherwise you'll get bitmap mismatches. This function checks the status of the bitmap whenever the blockmap changes, and fixes it accordingly. */ int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk, int error_on_dinode, enum gfs2_mark_block new_blockmap_state) { int old_bitmap_state, new_bitmap_state; struct rgrp_tree *rgd; rgd = gfs2_blk2rgrpd(sdp, blk); old_bitmap_state = lgfs2_get_bitmap(sdp, blk, rgd); if (old_bitmap_state < 0) { log_err( _("Block %llu (0x%llx) is not represented in the " "system bitmap; part of an rgrp or superblock.\n"), (unsigned long long)blk, (unsigned long long)blk); return -1; } new_bitmap_state = blockmap_to_bitmap(new_blockmap_state, sdp->gfs1); if (old_bitmap_state != new_bitmap_state) { const char *allocdesc[2][5] = { /* gfs2 descriptions */ {"free", "data", "unlinked", "inode", "reserved"}, /* gfs1 descriptions: */ {"free", "data", "free meta", "metadata", "reserved"}}; if (error_on_dinode && old_bitmap_state == GFS2_BLKST_DINODE && new_bitmap_state != GFS2_BLKST_FREE) { log_debug(_("Reference as '%s' to block %llu (0x%llx) " "which was marked as dinode. Needs " "further investigation.\n"), allocdesc[sdp->gfs1][new_bitmap_state], (unsigned long long)blk, (unsigned long long)blk); return 1; } /* Keep these messages as short as possible, or the output gets to be huge and unmanageable. */ log_err( _("Block %llu (0x%llx) was '%s', should be %s.\n"), (unsigned long long)blk, (unsigned long long)blk, allocdesc[sdp->gfs1][old_bitmap_state], allocdesc[sdp->gfs1][new_bitmap_state]); if (query( _("Fix the bitmap? (y/n)"))) { /* If the new bitmap state is free (and therefore the old state was not) we have to add to the free space in the rgrp. If the old bitmap state was free (and therefore it no longer is) we have to subtract to the free space. If the type changed from dinode to data or data to dinode, no change in free space. */ gfs2_set_bitmap(sdp, blk, new_bitmap_state); if (new_bitmap_state == GFS2_BLKST_FREE) { /* If we're freeing a dinode, get rid of the hash table entries for it. */ if (old_bitmap_state == GFS2_BLKST_DINODE) { struct dir_info *dt; struct inode_info *ii; dt = dirtree_find(blk); if (dt) dirtree_delete(dt); ii = inodetree_find(blk); if (ii) inodetree_delete(ii); } rgd->rg.rg_free++; if (sdp->gfs1) gfs_rgrp_out((struct gfs_rgrp *) &rgd->rg, rgd->bh[0]); else gfs2_rgrp_out_bh(&rgd->rg, rgd->bh[0]); } else if (old_bitmap_state == GFS2_BLKST_FREE) { rgd->rg.rg_free--; if (sdp->gfs1) gfs_rgrp_out((struct gfs_rgrp *) &rgd->rg, rgd->bh[0]); else gfs2_rgrp_out_bh(&rgd->rg, rgd->bh[0]); } log_err( _("The bitmap was fixed.\n")); } else { log_err( _("The bitmap inconsistency was ignored.\n")); } } return 0; } /* * _fsck_blockmap_set - Mark a block in the 4-bit blockmap and the 2-bit * bitmap, and adjust free space accordingly. */ int _fsck_blockmap_set(struct gfs2_inode *ip, uint64_t bblock, const char *btype, enum gfs2_mark_block mark, int error_on_dinode, const char *caller, int fline) { int error; static int prev_ino_addr = 0; static enum gfs2_mark_block prev_mark = 0; static int prevcount = 0; if (print_level >= MSG_DEBUG) { if ((ip->i_di.di_num.no_addr == prev_ino_addr) && (mark == prev_mark)) { log_info("(0x%llx) ", (unsigned long long)bblock); prevcount++; if (prevcount > 10) { log_info("\n"); prevcount = 0; } /* I'm circumventing the log levels here on purpose to make the output easier to debug. */ } else if (ip->i_di.di_num.no_addr == bblock) { if (prevcount) { log_info("\n"); prevcount = 0; } printf( _("(%s:%d) %s inode found at block " "(0x%llx): marking as '%s'\n"), caller, fline, btype, (unsigned long long)ip->i_di.di_num.no_addr, block_type_string(mark)); } else if (mark == gfs2_bad_block || mark == gfs2_meta_inval) { if (prevcount) { log_info("\n"); prevcount = 0; } printf( _("(%s:%d) inode (0x%llx) references %s block" " (0x%llx): marking as '%s'\n"), caller, fline, (unsigned long long)ip->i_di.di_num.no_addr, btype, (unsigned long long)bblock, block_type_string(mark)); } else { if (prevcount) { log_info("\n"); prevcount = 0; } printf( _("(%s:%d) inode (0x%llx) references %s block" " (0x%llx): marking as '%s'\n"), caller, fline, (unsigned long long)ip->i_di.di_num.no_addr, btype, (unsigned long long)bblock, block_type_string(mark)); } prev_ino_addr = ip->i_di.di_num.no_addr; prev_mark = mark; } /* First, check the rgrp bitmap against what we think it should be. If that fails, it's an invalid block--part of an rgrp. */ error = check_n_fix_bitmap(ip->i_sbd, bblock, error_on_dinode, mark); if (error) { if (error < 0) log_err( _("This block is not represented in the " "bitmap.\n")); return error; } error = gfs2_blockmap_set(bl, bblock, mark); return error; } struct duptree *dupfind(uint64_t block) { struct osi_node *node = dup_blocks.osi_node; while (node) { struct duptree *dt = (struct duptree *)node; if (block < dt->block) node = node->osi_left; else if (block > dt->block) node = node->osi_right; else return dt; } return NULL; } struct gfs2_inode *fsck_system_inode(struct gfs2_sbd *sdp, uint64_t block) { int j; if (lf_dip && lf_dip->i_di.di_num.no_addr == block) return lf_dip; if (!sdp->gfs1) return is_system_inode(sdp, block); if (sdp->md.statfs && block == sdp->md.statfs->i_di.di_num.no_addr) return sdp->md.statfs; if (sdp->md.jiinode && block == sdp->md.jiinode->i_di.di_num.no_addr) return sdp->md.jiinode; if (sdp->md.riinode && block == sdp->md.riinode->i_di.di_num.no_addr) return sdp->md.riinode; if (sdp->md.qinode && block == sdp->md.qinode->i_di.di_num.no_addr) return sdp->md.qinode; if (sdp->md.rooti && block == sdp->md.rooti->i_di.di_num.no_addr) return sdp->md.rooti; for (j = 0; j < sdp->md.journals; j++) if (sdp->md.journal && sdp->md.journal[j] && block == sdp->md.journal[j]->i_di.di_num.no_addr) return sdp->md.journal[j]; return NULL; } /* fsck_load_inode - same as gfs2_load_inode() in libgfs2 but system inodes get special treatment. */ struct gfs2_inode *fsck_load_inode(struct gfs2_sbd *sdp, uint64_t block) { struct gfs2_inode *ip = NULL; ip = fsck_system_inode(sdp, block); if (ip) return ip; if (sdp->gfs1) return lgfs2_gfs_inode_read(sdp, block); return lgfs2_inode_read(sdp, block); } /* fsck_inode_get - same as inode_get() in libgfs2 but system inodes get special treatment. */ struct gfs2_inode *fsck_inode_get(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh) { struct gfs2_inode *sysip; sysip = fsck_system_inode(sdp, bh->b_blocknr); if (sysip) return sysip; if (sdp->gfs1) return lgfs2_gfs_inode_get(sdp, bh); return lgfs2_inode_get(sdp, bh); } /* fsck_inode_put - same as inode_put() in libgfs2 but system inodes get special treatment. */ void fsck_inode_put(struct gfs2_inode **ip_in) { struct gfs2_inode *ip = *ip_in; struct gfs2_inode *sysip; sysip = fsck_system_inode(ip->i_sbd, ip->i_di.di_num.no_addr); if (!sysip) inode_put(ip_in); } /** * dirent_repair - attempt to repair a corrupt directory entry. * @bh - The buffer header that contains the bad dirent * @de - The directory entry in native format * @dent - The directory entry in on-disk format * @type - Type of directory (DIR_LINEAR or DIR_EXHASH) * @first - TRUE if this is the first dirent in the buffer * * This function tries to repair a corrupt directory entry. All we * know at this point is that the length field is wrong. */ static int dirent_repair(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, struct gfs2_dirent *de, struct gfs2_dirent *dent, int type, int first) { char *bh_end, *p; int calc_de_name_len = 0; /* If this is a sentinel, just fix the length and move on */ if (first && !de->de_inum.no_formal_ino) { /* Is it a sentinel? */ if (type == DIR_LINEAR) de->de_rec_len = ip->i_sbd->bsize - sizeof(struct gfs2_dinode); else de->de_rec_len = ip->i_sbd->bsize - sizeof(struct gfs2_leaf); } else { bh_end = bh->b_data + ip->i_sbd->bsize; /* first, figure out a probable name length */ p = (char *)dent + sizeof(struct gfs2_dirent); while (*p && /* while there's a non-zero char and */ isprint(*p) && /* a printable character and */ p < bh_end) { /* not past end of buffer */ calc_de_name_len++; p++; } if (!calc_de_name_len) return 1; /* There can often be noise at the end, so only */ /* Trust the shorter of the two in case we have too much */ /* Or rather, only trust ours if it's shorter. */ if (!de->de_name_len || de->de_name_len > NAME_MAX || calc_de_name_len < de->de_name_len) /* if dent is hosed */ de->de_name_len = calc_de_name_len; /* use ours */ de->de_rec_len = GFS2_DIRENT_SIZE(de->de_name_len); } gfs2_dirent_out(de, (char *)dent); bmodified(bh); return 0; } /** * dirblk_truncate - truncate a directory block */ static void dirblk_truncate(struct gfs2_inode *ip, struct gfs2_dirent *fixb, struct gfs2_buffer_head *bh) { char *bh_end; struct gfs2_dirent de; bh_end = bh->b_data + ip->i_sbd->sd_sb.sb_bsize; /* truncate the block to save the most dentries. To do this we have to patch the previous dent. */ gfs2_dirent_in(&de, (char *)fixb); de.de_rec_len = bh_end - (char *)fixb; gfs2_dirent_out(&de, (char *)fixb); bmodified(bh); } /* * check_entries - check directory entries for a given block * * @ip - dinode associated with this leaf block * bh - buffer for the leaf block * type - type of block this is (linear or exhash) * @count - set to the count entries * @lindex - the last inde * @pass - structure pointing to pass-specific functions * * returns: 0 - good block or it was repaired to be good * -1 - error occurred */ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, int type, uint32_t *count, int lindex, struct metawalk_fxns *pass) { struct gfs2_dirent *dent; struct gfs2_dirent de, *prev; int error = 0; char *bh_end; char *filename; int first = 1; bh_end = bh->b_data + ip->i_sbd->bsize; if (type == DIR_LINEAR) { dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_dinode)); } else if (type == DIR_EXHASH) { dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_leaf)); log_debug( _("Checking leaf %llu (0x%llx)\n"), (unsigned long long)bh->b_blocknr, (unsigned long long)bh->b_blocknr); } else { log_err( _("Invalid directory type %d specified\n"), type); return -1; } prev = NULL; if (!pass->check_dentry) return 0; while (1) { if (skip_this_pass || fsck_abort) return FSCK_OK; memset(&de, 0, sizeof(struct gfs2_dirent)); gfs2_dirent_in(&de, (char *)dent); filename = (char *)dent + sizeof(struct gfs2_dirent); if (de.de_rec_len < sizeof(struct gfs2_dirent) + de.de_name_len || (de.de_inum.no_formal_ino && !de.de_name_len && !first)) { log_err( _("Directory block %llu (0x%llx" "), entry %d of directory %llu " "(0x%llx) is corrupt.\n"), (unsigned long long)bh->b_blocknr, (unsigned long long)bh->b_blocknr, (*count) + 1, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (query( _("Attempt to repair it? (y/n) "))) { if (dirent_repair(ip, bh, &de, dent, type, first)) { if (first) /* make a new sentinel */ dirblk_truncate(ip, dent, bh); else dirblk_truncate(ip, prev, bh); log_err( _("Unable to repair corrupt " "directory entry; the " "entry was removed " "instead.\n")); return 0; } else { log_err( _("Corrupt directory entry " "repaired.\n")); /* keep looping through dentries */ } } else { log_err( _("Corrupt directory entry ignored, " "stopped after checking %d entries.\n"), *count); return 0; } } if (!de.de_inum.no_formal_ino){ if (first){ log_debug( _("First dirent is a sentinel (place holder).\n")); first = 0; } else { log_err( _("Directory entry with inode number of " "zero in leaf %llu (0x%llx) of " "directory %llu (0x%llx)!\n"), (unsigned long long)bh->b_blocknr, (unsigned long long)bh->b_blocknr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (query(_("Attempt to remove it? (y/n) "))) { dirblk_truncate(ip, prev, bh); log_err(_("The corrupt directory " "entry was removed.\n")); } else { log_err( _("Corrupt directory entry " "ignored, stopped after " "checking %d entries.\n"), *count); } return 0; } } else { if (!de.de_inum.no_addr && first) { /* reverse sentinel */ log_debug( _("First dirent is a Sentinel (place holder).\n")); /* Swap the two to silently make it a proper sentinel */ de.de_inum.no_addr = de.de_inum.no_formal_ino; de.de_inum.no_formal_ino = 0; gfs2_dirent_out(&de, (char *)dent); bmodified(bh); /* Mark dirent buffer as modified */ first = 0; } else { error = pass->check_dentry(ip, dent, prev, bh, filename, count, lindex, pass->private); if (error < 0) { stack; return -1; } } } if ((char *)dent + de.de_rec_len >= bh_end){ log_debug( _("Last entry processed for %lld->%lld " "(0x%llx->0x%llx), di_blocks=%llu.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)bh->b_blocknr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)bh->b_blocknr, (unsigned long long)ip->i_di.di_blocks); break; } /* If we didn't clear the dentry, or if we did, but it * was the first dentry, set prev */ if (!error || first) prev = dent; first = 0; dent = (struct gfs2_dirent *)((char *)dent + de.de_rec_len); } return 0; } /** * check_leaf - check a leaf block for errors * Reads in the leaf block * Leaves the buffer around for further analysis (caller must brelse) */ int check_leaf(struct gfs2_inode *ip, int lindex, struct metawalk_fxns *pass, uint64_t *leaf_no, struct gfs2_leaf *leaf, int *ref_count) { int error = 0, fix; struct gfs2_buffer_head *lbh = NULL; uint32_t count = 0; struct gfs2_sbd *sdp = ip->i_sbd; const char *msg; /* Make sure the block number is in range. */ if (!valid_block(ip->i_sbd, *leaf_no)) { log_err( _("Leaf block #%llu (0x%llx) is out of range for " "directory #%llu (0x%llx) at index %d (0x%x).\n"), (unsigned long long)*leaf_no, (unsigned long long)*leaf_no, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, lindex, lindex); msg = _("that is out of range"); goto bad_leaf; } /* Try to read in the leaf block. */ lbh = bread(sdp, *leaf_no); /* Make sure it's really a valid leaf block. */ if (gfs2_check_meta(lbh, GFS2_METATYPE_LF)) { msg = _("that is not really a leaf"); goto bad_leaf; } if (pass->check_leaf_depth) error = pass->check_leaf_depth(ip, *leaf_no, *ref_count, lbh); if (pass->check_leaf) { error = pass->check_leaf(ip, *leaf_no, pass->private); if (error == -EEXIST) { log_info(_("Previous reference to leaf %lld (0x%llx) " "has already checked it; skipping.\n"), (unsigned long long)*leaf_no, (unsigned long long)*leaf_no); brelse(lbh); return error; } } /* Early versions of GFS2 had an endianess bug in the kernel that set lf_dirent_format to cpu_to_be16(GFS2_FORMAT_DE). This was fixed to use cpu_to_be32(), but we should check for incorrect values and replace them with the correct value. */ gfs2_leaf_in(leaf, lbh); if (leaf->lf_dirent_format == (GFS2_FORMAT_DE << 16)) { log_debug( _("incorrect lf_dirent_format at leaf #%" PRIu64 "\n"), *leaf_no); leaf->lf_dirent_format = GFS2_FORMAT_DE; gfs2_leaf_out(leaf, lbh); log_debug( _("Fixing lf_dirent_format.\n")); } /* Make sure it's really a leaf. */ if (leaf->lf_header.mh_type != GFS2_METATYPE_LF) { log_err( _("Inode %llu (0x%llx) points to bad leaf %llu" " (0x%llx).\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)*leaf_no, (unsigned long long)*leaf_no); msg = _("that is not a leaf"); goto bad_leaf; } if (pass->check_dentry && is_dir(&ip->i_di, sdp->gfs1)) { error = check_entries(ip, lbh, DIR_EXHASH, &count, lindex, pass); if (skip_this_pass || fsck_abort) goto out; if (error < 0) { stack; goto out; /* This seems wrong: needs investigation */ } if (count == leaf->lf_entries) goto out; /* release and re-read the leaf in case check_entries changed it. */ brelse(lbh); lbh = bread(sdp, *leaf_no); gfs2_leaf_in(leaf, lbh); if (count != leaf->lf_entries) { log_err( _("Leaf %llu (0x%llx) entry count in " "directory %llu (0x%llx) does not match " "number of entries found - is %u, found %u\n"), (unsigned long long)*leaf_no, (unsigned long long)*leaf_no, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, leaf->lf_entries, count); if (query( _("Update leaf entry count? (y/n) "))) { leaf->lf_entries = count; gfs2_leaf_out(leaf, lbh); log_warn( _("Leaf entry count updated\n")); } else log_err( _("Leaf entry count left in " "inconsistant state\n")); } } out: brelse(lbh); return 0; bad_leaf: if (lbh) brelse(lbh); if (pass->repair_leaf) { /* The leaf we read in is bad so we need to repair it. */ fix = pass->repair_leaf(ip, leaf_no, lindex, *ref_count, msg, pass->private); if (fix < 0) return fix; } return 1; } static int u64cmp(const void *p1, const void *p2) { uint64_t a = *(uint64_t *)p1; uint64_t b = *(uint64_t *)p2; if (a > b) return 1; if (a < b) return -1; return 0; } static void dir_leaf_reada(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize) { uint64_t *t = alloca(hsize * sizeof(uint64_t)); uint64_t leaf_no; struct gfs2_sbd *sdp = ip->i_sbd; unsigned n = 0; unsigned i; for (i = 0; i < hsize; i++) { leaf_no = be64_to_cpu(tbl[i]); if (valid_block(ip->i_sbd, leaf_no)) t[n++] = leaf_no * sdp->bsize; } qsort(t, n, sizeof(uint64_t), u64cmp); for (i = 0; i < n; i++) posix_fadvise(sdp->device_fd, t[i], sdp->bsize, POSIX_FADV_WILLNEED); } /* Checks exhash directory entries */ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass) { int error = 0; struct gfs2_leaf leaf; unsigned hsize = (1 << ip->i_di.di_depth); uint64_t leaf_no, leaf_next; uint64_t first_ok_leaf, orig_di_blocks; struct gfs2_buffer_head *lbh; int lindex; struct gfs2_sbd *sdp = ip->i_sbd; int ref_count, orig_ref_count, orig_di_depth, orig_di_height; uint64_t *tbl; int chained_leaf, tbl_valid; tbl = get_dir_hash(ip); if (tbl == NULL) { perror("get_dir_hash"); return -1; } tbl_valid = 1; orig_di_depth = ip->i_di.di_depth; orig_di_height = ip->i_di.di_height; orig_di_blocks = ip->i_di.di_blocks; /* Turn off system readahead */ posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM); /* Readahead */ dir_leaf_reada(ip, tbl, hsize); if (pass->check_hash_tbl) { error = pass->check_hash_tbl(ip, tbl, hsize, pass->private); if (error < 0) { free(tbl); posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); return error; } /* If hash table changes were made, read it in again. */ if (error) { free(tbl); tbl = get_dir_hash(ip); if (tbl == NULL) { perror("get_dir_hash"); return -1; } } } /* Find the first valid leaf pointer in range and use it as our "old" leaf. That way, bad blocks at the beginning will be overwritten with the first valid leaf. */ first_ok_leaf = leaf_no = -1; for (lindex = 0; lindex < hsize; lindex++) { leaf_no = be64_to_cpu(tbl[lindex]); if (valid_block(ip->i_sbd, leaf_no)) { lbh = bread(sdp, leaf_no); /* Make sure it's really a valid leaf block. */ if (gfs2_check_meta(lbh, GFS2_METATYPE_LF) == 0) { brelse(lbh); first_ok_leaf = leaf_no; break; } brelse(lbh); } } if (first_ok_leaf == -1) { /* no valid leaf found */ log_err( _("Directory #%llu (0x%llx) has no valid leaf " "blocks\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); free(tbl); posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); return 1; } lindex = 0; leaf_next = -1; while (lindex < hsize) { int l; if (fsck_abort) break; if (!tbl_valid) { free(tbl); log_debug(_("Re-reading 0x%llx hash table.\n"), (unsigned long long)ip->i_di.di_num.no_addr); tbl = get_dir_hash(ip); if (tbl == NULL) { perror("get_dir_hash"); return -1; } tbl_valid = 1; orig_di_depth = ip->i_di.di_depth; orig_di_height = ip->i_di.di_height; orig_di_blocks = ip->i_di.di_blocks; } leaf_no = be64_to_cpu(tbl[lindex]); /* count the number of block pointers to this leaf. We don't need to count the current lindex, because we already know it's a reference */ ref_count = 1; for (l = lindex + 1; l < hsize; l++) { leaf_next = be64_to_cpu(tbl[l]); if (leaf_next != leaf_no) break; ref_count++; } orig_ref_count = ref_count; chained_leaf = 0; do { if (fsck_abort) { free(tbl); posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); return 0; } error = check_leaf(ip, lindex, pass, &leaf_no, &leaf, &ref_count); if (ref_count != orig_ref_count) tbl_valid = 0; if (!leaf.lf_next || error) break; leaf_no = leaf.lf_next; chained_leaf++; log_debug( _("Leaf chain #%d (0x%llx) detected.\n"), chained_leaf, (unsigned long long)leaf_no); } while (1); /* while we have chained leaf blocks */ if (orig_di_depth != ip->i_di.di_depth) { log_debug(_("Depth of 0x%llx changed from %d to %d\n"), (unsigned long long)ip->i_di.di_num.no_addr, orig_di_depth, ip->i_di.di_depth); tbl_valid = 0; } if (orig_di_height != ip->i_di.di_height) { log_debug(_("Height of 0x%llx changed from %d to " "%d\n"), (unsigned long long)ip->i_di.di_num.no_addr, orig_di_height, ip->i_di.di_height); tbl_valid = 0; } if (orig_di_blocks != ip->i_di.di_blocks) { log_debug(_("Block count of 0x%llx changed from %llu " "to %llu\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)orig_di_blocks, (unsigned long long)ip->i_di.di_blocks); tbl_valid = 0; } lindex += ref_count; } /* for every leaf block */ free(tbl); posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); return 0; } static int check_eattr_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, struct metawalk_fxns *pass) { struct gfs2_ea_header *ea_hdr, *ea_hdr_prev = NULL; uint64_t *ea_data_ptr = NULL; int i; int error = 0; uint32_t offset = (uint32_t)sizeof(struct gfs2_meta_header); if (!pass->check_eattr_entry) return 0; ea_hdr = (struct gfs2_ea_header *)(bh->b_data + sizeof(struct gfs2_meta_header)); while (1){ if (ea_hdr->ea_type == GFS2_EATYPE_UNUSED) error = 0; else error = pass->check_eattr_entry(ip, bh, ea_hdr, ea_hdr_prev, pass->private); if (error < 0) { stack; return -1; } if (error == 0 && pass->check_eattr_extentry && ea_hdr->ea_num_ptrs) { uint32_t tot_ealen = 0; struct gfs2_sbd *sdp = ip->i_sbd; ea_data_ptr = ((uint64_t *)((char *)ea_hdr + sizeof(struct gfs2_ea_header) + ((ea_hdr->ea_name_len + 7) & ~7))); /* It is possible when a EA is shrunk ** to have ea_num_ptrs be greater than ** the number required for ** data. ** In this case, the EA ** code leaves ** the blocks ** there for ** ** reuse........... */ for(i = 0; i < ea_hdr->ea_num_ptrs; i++){ if (pass->check_eattr_extentry(ip, ea_data_ptr, bh, ea_hdr, ea_hdr_prev, pass->private)) { log_err(_("Bad extended attribute " "found at block %lld " "(0x%llx)"), (unsigned long long) be64_to_cpu(*ea_data_ptr), (unsigned long long) be64_to_cpu(*ea_data_ptr)); if (query( _("Repair the bad Extended " "Attribute? (y/n) "))) { ea_hdr->ea_num_ptrs = i; ea_hdr->ea_data_len = cpu_to_be32(tot_ealen); *ea_data_ptr = 0; bmodified(bh); /* Endianness doesn't matter in this case because it's a single byte. */ fsck_blockmap_set(ip, ip->i_di.di_eattr, _("extended attribute"), gfs2_meta_eattr); log_err( _("The EA was " "fixed.\n")); } else { error = 1; log_err( _("The bad EA was " "not fixed.\n")); } } tot_ealen += sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); ea_data_ptr++; } } offset += be32_to_cpu(ea_hdr->ea_rec_len); if (ea_hdr->ea_flags & GFS2_EAFLAG_LAST || offset >= ip->i_sbd->sd_sb.sb_bsize || ea_hdr->ea_rec_len == 0){ break; } ea_hdr_prev = ea_hdr; ea_hdr = (struct gfs2_ea_header *) ((char *)(ea_hdr) + be32_to_cpu(ea_hdr->ea_rec_len)); } return error; } /** * check_leaf_eattr * @ip: the inode the eattr comes from * @block: block number of the leaf * * Returns: 0 on success, 1 if removal is needed, -1 on error */ static int check_leaf_eattr(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct metawalk_fxns *pass) { struct gfs2_buffer_head *bh = NULL; if (pass->check_eattr_leaf) { int error = 0; log_debug( _("Checking EA leaf block #%llu (0x%llx).\n"), (unsigned long long)block, (unsigned long long)block); error = pass->check_eattr_leaf(ip, block, parent, &bh, pass->private); if (error < 0) { stack; return -1; } if (error > 0) { if (bh) brelse(bh); return 1; } if (bh) { error = check_eattr_entries(ip, bh, pass); brelse(bh); } return error; } return 0; } /** * delete_block - delete a block associated with an inode */ int delete_block(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, const char *btype, void *private) { if (valid_block(ip->i_sbd, block)) { fsck_blockmap_set(ip, block, btype, gfs2_block_free); return 0; } return -1; } /** * find_remove_dup - find out if this is a duplicate ref. If so, remove it. * * Returns: 1 if there are any remaining references to this block, else 0. */ int find_remove_dup(struct gfs2_inode *ip, uint64_t block, const char *btype) { struct duptree *dt; struct inode_with_dups *id; dt = dupfind(block); if (!dt) return 0; /* remove the inode reference id structure for this reference. */ id = find_dup_ref_inode(dt, ip); if (!id) goto more_refs; dup_listent_delete(dt, id); if (dt->refs == 0) { log_info( _("This was the last reference: it's no longer a " "duplicate.\n")); dup_delete(dt); /* not duplicate now */ return 0; } more_refs: log_info( _("%d block reference(s) remain.\n"), dt->refs); return 1; /* references still exist so do not free the block. */ } /** * delete_block_if_notdup - delete blocks associated with an inode * * Ignore blocks that are already marked free. * If it has been identified as duplicate, remove the duplicate reference. * If all duplicate references have been removed, delete the block. */ static int delete_block_if_notdup(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, const char *btype, int *was_duplicate, void *private) { uint8_t q; if (!valid_block(ip->i_sbd, block)) return meta_error; q = block_type(block); if (q == gfs2_block_free) { log_info( _("%s block %lld (0x%llx), part of inode " "%lld (0x%llx), was already free.\n"), btype, (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); return meta_is_good; } if (find_remove_dup(ip, block, btype)) { /* a dup */ if (was_duplicate) *was_duplicate = 1; log_err( _("Not clearing duplicate reference in inode " "at block #%llu (0x%llx) to block #%llu (0x%llx) " "because it's referenced by another inode.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)block, (unsigned long long)block); } else { fsck_blockmap_set(ip, block, btype, gfs2_block_free); } return meta_is_good; } /** * check_indirect_eattr * @ip: the inode the eattr comes from * @indirect_block * * Returns: 0 on success -1 on error */ static int check_indirect_eattr(struct gfs2_inode *ip, uint64_t indirect, struct metawalk_fxns *pass) { int error = 0; uint64_t *ea_leaf_ptr, *end; uint64_t block; struct gfs2_buffer_head *indirect_buf = NULL; struct gfs2_sbd *sdp = ip->i_sbd; int first_ea_is_bad = 0; uint64_t di_eattr_save = ip->i_di.di_eattr; uint64_t offset = ip->i_sbd->gfs1 ? sizeof(struct gfs_indirect) : sizeof(struct gfs2_meta_header); log_debug( _("Checking EA indirect block #%llu (0x%llx).\n"), (unsigned long long)indirect, (unsigned long long)indirect); if (!pass->check_eattr_indir) return 0; error = pass->check_eattr_indir(ip, indirect, ip->i_di.di_num.no_addr, &indirect_buf, pass->private); if (!error) { int leaf_pointers = 0, leaf_pointer_errors = 0; ea_leaf_ptr = (uint64_t *)(indirect_buf->b_data + offset); end = ea_leaf_ptr + ((sdp->sd_sb.sb_bsize - offset) / 8); while (*ea_leaf_ptr && (ea_leaf_ptr < end)){ block = be64_to_cpu(*ea_leaf_ptr); leaf_pointers++; error = check_leaf_eattr(ip, block, indirect, pass); if (error) { leaf_pointer_errors++; if (query( _("Fix the indirect " "block too? (y/n) "))) *ea_leaf_ptr = 0; } /* If the first eattr lead is bad, we can't have a hole, so we have to treat this as an unrecoverable eattr error and delete all eattr info. Calling finish_eattr_indir here causes ip->i_di.di_eattr = 0 and that ensures that subsequent calls to check_leaf_eattr result in the eattr check_leaf_block nuking them all "due to previous errors" */ if (leaf_pointers == 1 && leaf_pointer_errors == 1) { first_ea_is_bad = 1; if (pass->finish_eattr_indir) pass->finish_eattr_indir(ip, leaf_pointers, leaf_pointer_errors, pass->private); } else if (leaf_pointer_errors) { /* This is a bit tricky. We can't have eattr holes. So if we have 4 good eattrs, 1 bad eattr and 5 more good ones: GGGGBGGGGG, we need to tell check_leaf_eattr to delete all eattrs after the bad one. So we want: GGGG when we finish. To do that, we set di_eattr to 0 temporarily. */ ip->i_di.di_eattr = 0; bmodified(ip->i_bh); } ea_leaf_ptr++; } if (pass->finish_eattr_indir) { if (!first_ea_is_bad) { /* If the first ea is good but subsequent ones were bad and deleted, we need to restore the saved di_eattr block. */ if (leaf_pointer_errors) ip->i_di.di_eattr = di_eattr_save; pass->finish_eattr_indir(ip, leaf_pointers, leaf_pointer_errors, pass->private); } if (leaf_pointer_errors && leaf_pointer_errors == leaf_pointers) { delete_block(ip, indirect, NULL, "leaf", NULL); error = 1; } } } if (indirect_buf) brelse(indirect_buf); return error; } /** * check_inode_eattr - check the EA's for a single inode * @ip: the inode whose EA to check * * Returns: 0 on success, -1 on error */ int check_inode_eattr(struct gfs2_inode *ip, struct metawalk_fxns *pass) { int error = 0; if (!ip->i_di.di_eattr) return 0; log_debug( _("Extended attributes exist for inode #%llu (0x%llx).\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT){ if ((error = check_indirect_eattr(ip, ip->i_di.di_eattr, pass))) stack; } else { error = check_leaf_eattr(ip, ip->i_di.di_eattr, ip->i_di.di_num.no_addr, pass); if (error) stack; } return error; } /** * free_metalist - free all metadata on a multi-level metadata list */ static void free_metalist(struct gfs2_inode *ip, osi_list_t *mlp) { int i; struct gfs2_buffer_head *nbh; for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { osi_list_t *list; list = &mlp[i]; while (!osi_list_empty(list)) { nbh = osi_list_entry(list->next, struct gfs2_buffer_head, b_altlist); if (nbh == ip->i_bh) osi_list_del(&nbh->b_altlist); else brelse(nbh); } } } /** * build_and_check_metalist - check a bunch of indirect blocks * This includes hash table blocks for directories * which are technically "data" in the bitmap. * * Returns: 0 - all is well, process the blocks this metadata references * 1 - something went wrong, but process the sub-blocks anyway * -1 - something went wrong, so don't process the sub-blocks * @ip: * @mlp: */ static int build_and_check_metalist(struct gfs2_inode *ip, osi_list_t *mlp, struct metawalk_fxns *pass) { uint32_t height = ip->i_di.di_height; struct gfs2_buffer_head *bh, *nbh, *metabh = ip->i_bh; osi_list_t *prev_list, *cur_list, *tmp; int h, head_size, iblk_type; uint64_t *ptr, block; int error, was_duplicate, is_valid; osi_list_add(&metabh->b_altlist, &mlp[0]); /* Directories are special. Their 'data' is the hash table, which is basically an indirect block list. Their height is not important because it checks everything through the hash table using "depth" field calculations. However, we still have to check the indirect blocks, even if the height == 1. */ if (is_dir(&ip->i_di, ip->i_sbd->gfs1)) height++; /* if () */ if (height < 2) return meta_is_good; for (h = 1; h < height; h++) { if (h > 1) { if (is_dir(&ip->i_di, ip->i_sbd->gfs1) && h == ip->i_di.di_height + 1) iblk_type = GFS2_METATYPE_JD; else iblk_type = GFS2_METATYPE_IN; if (ip->i_sbd->gfs1) head_size = sizeof(struct gfs_indirect); else head_size = sizeof(struct gfs2_meta_header); } else { iblk_type = GFS2_METATYPE_DI; head_size = sizeof(struct gfs2_dinode); } prev_list = &mlp[h - 1]; cur_list = &mlp[h]; for (tmp = prev_list->next; tmp != prev_list; tmp = tmp->next){ bh = osi_list_entry(tmp, struct gfs2_buffer_head, b_altlist); if (gfs2_check_meta(bh, iblk_type)) continue; /* Now check the metadata itself */ for (ptr = (uint64_t *)(bh->b_data + head_size); (char *)ptr < (bh->b_data + ip->i_sbd->bsize); ptr++) { if (skip_this_pass || fsck_abort) { free_metalist(ip, mlp); return meta_is_good; } nbh = NULL; if (!*ptr) continue; block = be64_to_cpu(*ptr); was_duplicate = 0; error = pass->check_metalist(ip, block, &nbh, h, &is_valid, &was_duplicate, pass->private); /* check_metalist should hold any buffers it gets with "bread". */ if (error == meta_error) { stack; log_info(_("\nSerious metadata " "error on block %llu " "(0x%llx).\n"), (unsigned long long)block, (unsigned long long)block); return error; } if (error == meta_skip_further) { log_info(_("\nUnrecoverable metadata " "error on block %llu " "(0x%llx). Further metadata" " will be skipped.\n"), (unsigned long long)block, (unsigned long long)block); return error; } if (!is_valid) { log_debug( _("Skipping rejected block " "%llu (0x%llx)\n"), (unsigned long long)block, (unsigned long long)block); continue; } if (was_duplicate) { log_debug( _("Skipping duplicate %llu " "(0x%llx)\n"), (unsigned long long)block, (unsigned long long)block); continue; } if (!valid_block(ip->i_sbd, block)) { log_debug( _("Skipping invalid block " "%lld (0x%llx)\n"), (unsigned long long)block, (unsigned long long)block); continue; } if (!nbh) nbh = bread(ip->i_sbd, block); osi_list_add_prev(&nbh->b_altlist, cur_list); } /* for all data on the indirect block */ } /* for blocks at that height */ } /* for height */ return 0; } /** * check_data - check all data pointers for a given buffer * This does not include "data" blocks that are really * hash table blocks for directories. * * @ip: * * returns: +ENOENT if there are too many bad pointers * -1 if a more serious error occurred. * 0 if no errors occurred * 1 if errors were found and corrected * 2 (ENOENT) is there were too many bad pointers */ static int check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass, struct gfs2_buffer_head *bh, int head_size, uint64_t *blks_checked, uint64_t *error_blk) { int error = 0, rc = 0; uint64_t block, *ptr; uint64_t *ptr_start = (uint64_t *)(bh->b_data + head_size); char *ptr_end = (bh->b_data + ip->i_sbd->bsize); uint64_t metablock = bh->b_blocknr; /* If there isn't much pointer corruption check the pointers */ for (ptr = ptr_start ; (char *)ptr < ptr_end && !fsck_abort; ptr++) { if (!*ptr) continue; if (skip_this_pass || fsck_abort) return error; block = be64_to_cpu(*ptr); /* It's important that we don't call valid_block() and bypass calling check_data on invalid blocks because that would defeat the rangecheck_block related functions in pass1. Therefore the individual check_data functions should do a range check. */ rc = pass->check_data(ip, metablock, block, pass->private); if (!error && rc) { error = rc; log_info("\n"); if (rc < 0) { *error_blk = block; log_info(_("Unrecoverable ")); } log_info(_("data block error %d on block %llu " "(0x%llx).\n"), rc, (unsigned long long)block, (unsigned long long)block); } if (rc < 0) return rc; (*blks_checked)++; } return error; } static int undo_check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass, uint64_t *ptr_start, char *ptr_end, uint64_t error_blk) { int rc = 0; uint64_t block, *ptr; /* If there isn't much pointer corruption check the pointers */ for (ptr = ptr_start ; (char *)ptr < ptr_end && !fsck_abort; ptr++) { if (!*ptr) continue; if (skip_this_pass || fsck_abort) return 1; block = be64_to_cpu(*ptr); if (block == error_blk) return 1; rc = pass->undo_check_data(ip, block, pass->private); if (rc < 0) return rc; } return 0; } static int hdr_size(struct gfs2_buffer_head *bh, int height) { if (height > 1) { if (gfs2_check_meta(bh, GFS2_METATYPE_IN)) return 0; if (bh->sdp->gfs1) return sizeof(struct gfs_indirect); else return sizeof(struct gfs2_meta_header); } /* if this isn't really a dinode, skip it */ if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) return 0; return sizeof(struct gfs2_dinode); } /** * check_metatree * @ip: inode structure in memory * @pass: structure passed in from caller to determine the sub-functions * */ int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass) { osi_list_t metalist[GFS2_MAX_META_HEIGHT]; osi_list_t *list, *tmp; struct gfs2_buffer_head *bh; uint32_t height = ip->i_di.di_height; int i, head_size; uint64_t blks_checked = 0; int error, rc; int metadata_clean = 0; uint64_t error_blk = 0; int hit_error_blk = 0; if (!height && !is_dir(&ip->i_di, ip->i_sbd->gfs1)) return 0; for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) osi_list_init(&metalist[i]); /* create and check the metadata list for each height */ error = build_and_check_metalist(ip, &metalist[0], pass); if (error) { stack; goto undo_metalist; } metadata_clean = 1; /* For directories, we've already checked the "data" blocks which * comprise the directory hash table, so we perform the directory * checks and exit. */ if (is_dir(&ip->i_di, ip->i_sbd->gfs1)) { if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH)) goto out; /* check validity of leaf blocks and leaf chains */ error = check_leaf_blks(ip, pass); if (error) goto undo_metalist; goto out; } /* check data blocks */ list = &metalist[height - 1]; if (ip->i_di.di_blocks > COMFORTABLE_BLKS) last_reported_fblock = -10000000; for (tmp = list->next; !error && tmp != list; tmp = tmp->next) { if (fsck_abort) { free_metalist(ip, &metalist[0]); return 0; } bh = osi_list_entry(tmp, struct gfs2_buffer_head, b_altlist); head_size = hdr_size(bh, height); if (!head_size) continue; if (pass->check_data) error = check_data(ip, pass, bh, head_size, &blks_checked, &error_blk); if (pass->big_file_msg && ip->i_di.di_blocks > COMFORTABLE_BLKS) pass->big_file_msg(ip, blks_checked); } if (pass->big_file_msg && ip->i_di.di_blocks > COMFORTABLE_BLKS) { log_notice( _("\rLarge file at %lld (0x%llx) - 100 percent " "complete. " "\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); fflush(stdout); } undo_metalist: if (!error) goto out; log_err( _("Error: inode %llu (0x%llx) had unrecoverable errors.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (!query( _("Remove the invalid inode? (y/n) "))) { free_metalist(ip, &metalist[0]); log_err(_("Invalid inode not deleted.\n")); return error; } for (i = 0; pass->undo_check_meta && i < height; i++) { while (!osi_list_empty(&metalist[i])) { list = &metalist[i]; bh = osi_list_entry(list->next, struct gfs2_buffer_head, b_altlist); log_err(_("Undoing metadata work for block %llu " "(0x%llx)\n"), (unsigned long long)bh->b_blocknr, (unsigned long long)bh->b_blocknr); if (i) rc = pass->undo_check_meta(ip, bh->b_blocknr, i, pass->private); else rc = 0; if (metadata_clean && rc == 0 && i == height - 1 && !hit_error_blk) { head_size = hdr_size(bh, height); if (head_size) { rc = undo_check_data(ip, pass, (uint64_t *) (bh->b_data + head_size), (bh->b_data + ip->i_sbd->bsize), error_blk); if (rc > 0) { hit_error_blk = 1; rc = 0; } } } if (bh == ip->i_bh) osi_list_del(&bh->b_altlist); else brelse(bh); } } /* There may be leftover duplicate records, so we need to delete them. For example, if a metadata block was found to be a duplicate, we may not have added it to the metalist, which means it's not there to undo. */ delete_all_dups(ip); /* Set the dinode as "bad" so it gets deleted */ fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("corrupt"), gfs2_block_free); log_err(_("The corrupt inode was invalidated.\n")); out: free_metalist(ip, &metalist[0]); return error; } /* Checks stuffed inode directories */ int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, struct metawalk_fxns *pass) { int error = 0; uint32_t count = 0; error = check_entries(ip, bh, DIR_LINEAR, &count, 0, pass); if (error < 0) { stack; return -1; } return error; } int check_dir(struct gfs2_sbd *sdp, uint64_t block, struct metawalk_fxns *pass) { struct gfs2_inode *ip; int error = 0; uint64_t cur_blks; ip = fsck_load_inode(sdp, block); cur_blks = ip->i_di.di_blocks; if (ip->i_di.di_flags & GFS2_DIF_EXHASH) error = check_leaf_blks(ip, pass); else error = check_linear_dir(ip, ip->i_bh, pass); if (error < 0) stack; if (ip->i_di.di_blocks != cur_blks) reprocess_inode(ip, _("Current")); fsck_inode_put(&ip); /* does a brelse */ return error; } static int remove_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent, struct gfs2_dirent *prev_de, struct gfs2_buffer_head *bh, char *filename, uint32_t *count, int lindex, void *private) { /* the metawalk_fxn's private field must be set to the dentry * block we want to clear */ uint64_t *dentryblock = (uint64_t *) private; struct gfs2_dirent dentry, *de; memset(&dentry, 0, sizeof(struct gfs2_dirent)); gfs2_dirent_in(&dentry, (char *)dent); de = &dentry; if (de->de_inum.no_addr == *dentryblock) dirent2_del(ip, bh, prev_de, dent); else (*count)++; return 0; } int remove_dentry_from_dir(struct gfs2_sbd *sdp, uint64_t dir, uint64_t dentryblock) { struct metawalk_fxns remove_dentry_fxns = {0}; uint8_t q; int error; log_debug( _("Removing dentry %llu (0x%llx) from directory %llu" " (0x%llx)\n"), (unsigned long long)dentryblock, (unsigned long long)dentryblock, (unsigned long long)dir, (unsigned long long)dir); if (!valid_block(sdp, dir)) { log_err( _("Parent directory is invalid\n")); return 1; } remove_dentry_fxns.private = &dentryblock; remove_dentry_fxns.check_dentry = remove_dentry; q = block_type(dir); if (q != gfs2_inode_dir) { log_info( _("Parent block is not a directory...ignoring\n")); return 1; } /* Need to run check_dir with a private var of dentryblock, * and fxns that remove that dentry if found */ error = check_dir(sdp, dir, &remove_dentry_fxns); return error; } int delete_metadata(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private) { *is_valid = 1; *was_duplicate = 0; return delete_block_if_notdup(ip, block, bh, _("metadata"), was_duplicate, private); } int delete_leaf(struct gfs2_inode *ip, uint64_t block, void *private) { return delete_block_if_notdup(ip, block, NULL, _("leaf"), NULL, private); } int delete_data(struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private) { return delete_block_if_notdup(ip, block, NULL, _("data"), NULL, private); } static int del_eattr_generic(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private, const char *eatype) { int ret = 0; int was_free = 0; uint8_t q; if (valid_block(ip->i_sbd, block)) { q = block_type(block); if (q == gfs2_block_free) was_free = 1; ret = delete_block_if_notdup(ip, block, NULL, eatype, NULL, private); if (!ret) { *bh = bread(ip->i_sbd, block); if (!was_free) ip->i_di.di_blocks--; bmodified(ip->i_bh); } } /* Even if it's a duplicate reference, we want to eliminate the reference itself, and adjust di_blocks accordingly. */ if (ip->i_di.di_eattr) { if (block == ip->i_di.di_eattr) ip->i_di.di_eattr = 0; bmodified(ip->i_bh); } return ret; } int delete_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { return del_eattr_generic(ip, block, parent, bh, private, _("extended attribute")); } int delete_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { return del_eattr_generic(ip, block, parent, bh, private, _("indirect extended attribute")); } int delete_eattr_entry(struct gfs2_inode *ip, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; char ea_name[256]; uint32_t avail_size; int max_ptrs; if (!ea_hdr->ea_name_len){ /* Skip this entry for now */ return 1; } memset(ea_name, 0, sizeof(ea_name)); strncpy(ea_name, (char *)ea_hdr + sizeof(struct gfs2_ea_header), ea_hdr->ea_name_len); if (!GFS2_EATYPE_VALID(ea_hdr->ea_type) && ((ea_hdr_prev) || (!ea_hdr_prev && ea_hdr->ea_type))){ /* Skip invalid entry */ return 1; } if (!ea_hdr->ea_num_ptrs) return 0; avail_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); max_ptrs = (be32_to_cpu(ea_hdr->ea_data_len) + avail_size - 1) / avail_size; if (max_ptrs > ea_hdr->ea_num_ptrs) return 1; log_debug( _(" Pointers Required: %d\n Pointers Reported: %d\n"), max_ptrs, ea_hdr->ea_num_ptrs); return 0; } int delete_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { uint64_t block = be64_to_cpu(*ea_data_ptr); return delete_block_if_notdup(ip, block, NULL, _("extended attribute"), NULL, private); } static int alloc_metalist(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private) { uint8_t q; const char *desc = (const char *)private; /* No need to range_check here--if it was added, it's in range. */ /* We can't check the bitmap here because this function is called after the bitmap has been set but before the blockmap has. */ *is_valid = 1; *was_duplicate = 0; *bh = bread(ip->i_sbd, block); q = block_type(block); if (blockmap_to_bitmap(q, ip->i_sbd->gfs1) == GFS2_BLKST_FREE) { log_debug(_("%s reference to new metadata block " "%lld (0x%llx) is now marked as indirect.\n"), desc, (unsigned long long)block, (unsigned long long)block); gfs2_blockmap_set(bl, block, gfs2_indir_blk); } return meta_is_good; } static int alloc_data(struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private) { uint8_t q; const char *desc = (const char *)private; /* No need to range_check here--if it was added, it's in range. */ /* We can't check the bitmap here because this function is called after the bitmap has been set but before the blockmap has. */ q = block_type(block); if (blockmap_to_bitmap(q, ip->i_sbd->gfs1) == GFS2_BLKST_FREE) { log_debug(_("%s reference to new data block " "%lld (0x%llx) is now marked as data.\n"), desc, (unsigned long long)block, (unsigned long long)block); gfs2_blockmap_set(bl, block, gfs2_block_used); } return 0; } static int alloc_leaf(struct gfs2_inode *ip, uint64_t block, void *private) { uint8_t q; /* No need to range_check here--if it was added, it's in range. */ /* We can't check the bitmap here because this function is called after the bitmap has been set but before the blockmap has. */ q = block_type(block); if (blockmap_to_bitmap(q, ip->i_sbd->gfs1) == GFS2_BLKST_FREE) fsck_blockmap_set(ip, block, _("newly allocated leaf"), gfs2_leaf_blk); return 0; } struct metawalk_fxns alloc_fxns = { .private = NULL, .check_leaf = alloc_leaf, .check_metalist = alloc_metalist, .check_data = alloc_data, .check_eattr_indir = NULL, .check_eattr_leaf = NULL, .check_dentry = NULL, .check_eattr_entry = NULL, .check_eattr_extentry = NULL, .finish_eattr_indir = NULL, }; /* * reprocess_inode - fixes the blockmap to match the bitmap due to an * unexpected block allocation via libgfs2. * * The problem we're trying to overcome here is when a new block must be * added to a dinode because of a write. This will happen when lost+found * needs a new indirect block for its hash table. In that case, the write * causes a new block to be assigned in the bitmap but that block is not yet * accurately reflected in the fsck blockmap. We need to compensate here. * * We can't really use fsck_blockmap_set here because the new block * was already allocated by libgfs2 and therefore it took care of * the rgrp free space variable. fsck_blockmap_set adjusts the free space * in the rgrp according to the change, which has already been done. * So it's only our blockmap that now disagrees with the rgrp bitmap, so we * need to fix only that. */ void reprocess_inode(struct gfs2_inode *ip, const char *desc) { int error; alloc_fxns.private = (void *)desc; log_info( _("%s inode %llu (0x%llx) had blocks added; reprocessing " "its metadata tree at height=%d.\n"), desc, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, ip->i_di.di_height); error = check_metatree(ip, &alloc_fxns); if (error) log_err( _("Error %d reprocessing the %s metadata tree.\n"), error, desc); } /* * write_new_leaf - allocate and write a new leaf to cover a gap in hash table * @dip: the directory inode * @start_lindex: where in the hash table to start writing * @num_copies: number of copies of the pointer to write into hash table * @before_or_after: desc. of whether this is being added before/after/etc. * @bn: pointer to return the newly allocated leaf's block number */ int write_new_leaf(struct gfs2_inode *dip, int start_lindex, int num_copies, const char *before_or_after, uint64_t *bn) { struct gfs2_buffer_head *nbh; struct gfs2_leaf *leaf; struct gfs2_dirent *dent; int count, i; int factor = 0, pad_size; uint64_t *cpyptr; char *padbuf; int divisor = num_copies; int end_lindex = start_lindex + num_copies; padbuf = malloc(num_copies * sizeof(uint64_t)); /* calculate the depth needed for the new leaf */ while (divisor > 1) { factor++; divisor /= 2; } /* Make sure the number of copies is properly a factor of 2 */ if ((1 << factor) != num_copies) { log_err(_("Program error: num_copies not a factor of 2.\n")); log_err(_("num_copies=%d, dinode = %lld (0x%llx)\n"), num_copies, (unsigned long long)dip->i_di.di_num.no_addr, (unsigned long long)dip->i_di.di_num.no_addr); log_err(_("lindex = %d (0x%x)\n"), start_lindex, start_lindex); stack; free(padbuf); return -1; } /* allocate and write out a new leaf block */ *bn = meta_alloc(dip); fsck_blockmap_set(dip, *bn, _("directory leaf"), gfs2_leaf_blk); log_err(_("A new directory leaf was allocated at block %lld " "(0x%llx) to fill the %d (0x%x) pointer gap %s the existing " "pointer at index %d (0x%x).\n"), (unsigned long long)*bn, (unsigned long long)*bn, num_copies, num_copies, before_or_after, start_lindex, start_lindex); dip->i_di.di_blocks++; bmodified(dip->i_bh); nbh = bget(dip->i_sbd, *bn); memset(nbh->b_data, 0, dip->i_sbd->bsize); leaf = (struct gfs2_leaf *)nbh->b_data; leaf->lf_header.mh_magic = cpu_to_be32(GFS2_MAGIC); leaf->lf_header.mh_type = cpu_to_be32(GFS2_METATYPE_LF); leaf->lf_header.mh_format = cpu_to_be32(GFS2_FORMAT_LF); leaf->lf_depth = cpu_to_be16(dip->i_di.di_depth - factor); /* initialize the first dirent on the new leaf block */ dent = (struct gfs2_dirent *)(nbh->b_data + sizeof(struct gfs2_leaf)); dent->de_rec_len = cpu_to_be16(dip->i_sbd->bsize - sizeof(struct gfs2_leaf)); bmodified(nbh); brelse(nbh); /* pad the hash table with the new leaf block */ cpyptr = (uint64_t *)padbuf; for (i = start_lindex; i < end_lindex; i++) { *cpyptr = cpu_to_be64(*bn); cpyptr++; } pad_size = num_copies * sizeof(uint64_t); log_err(_("Writing to the hash table of directory %lld " "(0x%llx) at index: 0x%x for 0x%lx pointers.\n"), (unsigned long long)dip->i_di.di_num.no_addr, (unsigned long long)dip->i_di.di_num.no_addr, start_lindex, pad_size / sizeof(uint64_t)); if (dip->i_sbd->gfs1) count = gfs1_writei(dip, padbuf, start_lindex * sizeof(uint64_t), pad_size); else count = gfs2_writei(dip, padbuf, start_lindex * sizeof(uint64_t), pad_size); free(padbuf); if (count != pad_size) { log_err( _("Error: bad write while fixing directory leaf " "pointers.\n")); return -1; } return 0; } /* repair_leaf - Warn the user of an error and ask permission to fix it * Process a bad leaf pointer and ask to repair the first time. * The repair process involves extending the previous leaf's entries * so that they replace the bad ones. We have to hack up the old * leaf a bit, but it's better than deleting the whole directory, * which is what used to happen before. */ int repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no, int lindex, int ref_count, const char *msg, int allow_alloc) { int new_leaf_blks = 0, error, refs; uint64_t bn = 0; log_err( _("Directory Inode %llu (0x%llx) points to leaf %llu" " (0x%llx) %s.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)*leaf_no, (unsigned long long)*leaf_no, msg); if (!query( _("Attempt to patch around it? (y/n) "))) { log_err( _("Bad leaf left in place.\n")); goto out; } if (!allow_alloc) { uint64_t *cpyptr; char *padbuf; int pad_size, i; padbuf = malloc(ref_count * sizeof(uint64_t)); cpyptr = (uint64_t *)padbuf; for (i = 0; i < ref_count; i++) { *cpyptr = 0; cpyptr++; } pad_size = ref_count * sizeof(uint64_t); log_err(_("Writing zeros to the hash table of directory %lld " "(0x%llx) at index: 0x%x for 0x%x pointers.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, lindex, ref_count); if (ip->i_sbd->gfs1) gfs1_writei(ip, padbuf, lindex * sizeof(uint64_t), pad_size); else gfs2_writei(ip, padbuf, lindex * sizeof(uint64_t), pad_size); free(padbuf); log_err( _("Directory Inode %llu (0x%llx) patched.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); goto out; } /* We can only write leafs in quantities that are factors of two, since leaves are doubled, not added sequentially. So if we have a hole that's not a factor of 2, we have to break it down into separate leaf blocks that are. */ while (ref_count) { refs = 1; while (refs <= ref_count) { if (refs * 2 > ref_count) break; refs *= 2; } error = write_new_leaf(ip, lindex, refs, _("replacing"), &bn); if (error) return error; new_leaf_blks++; lindex += refs; ref_count -= refs; } log_err( _("Directory Inode %llu (0x%llx) repaired.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); out: *leaf_no = bn; return new_leaf_blks; } gfs2-utils/gfs2/fsck/metawalk.h0000664000175000017500000001433012154127655015305 0ustar andyandy#ifndef _METAWALK_H #define _METAWALK_H #define DIR_LINEAR 1 #define DIR_EXHASH 2 #include "util.h" struct metawalk_fxns; extern int check_inode_eattr(struct gfs2_inode *ip, struct metawalk_fxns *pass); extern int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass); extern int check_dir(struct gfs2_sbd *sdp, uint64_t block, struct metawalk_fxns *pass); extern int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, struct metawalk_fxns *pass); extern int check_leaf(struct gfs2_inode *ip, int lindex, struct metawalk_fxns *pass, uint64_t *leaf_no, struct gfs2_leaf *leaf, int *ref_count); extern int remove_dentry_from_dir(struct gfs2_sbd *sdp, uint64_t dir, uint64_t dentryblock); extern int delete_block(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, const char *btype, void *private); extern int delete_metadata(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private); extern int delete_leaf(struct gfs2_inode *ip, uint64_t block, void *private); extern int delete_data(struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private); extern int delete_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private); extern int delete_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private); extern int delete_eattr_entry(struct gfs2_inode *ip, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private); extern int delete_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private); extern int _fsck_blockmap_set(struct gfs2_inode *ip, uint64_t bblock, const char *btype, enum gfs2_mark_block mark, int error_on_dinode, const char *caller, int line); extern int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk, int error_on_dinode, enum gfs2_mark_block new_blockmap_state); extern void reprocess_inode(struct gfs2_inode *ip, const char *desc); extern struct duptree *dupfind(uint64_t block); extern struct gfs2_inode *fsck_system_inode(struct gfs2_sbd *sdp, uint64_t block); extern int find_remove_dup(struct gfs2_inode *ip, uint64_t block, const char *btype); extern int write_new_leaf(struct gfs2_inode *dip, int start_lindex, int num_copies, const char *before_or_after, uint64_t *bn); extern int repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no, int lindex, int ref_count, const char *msg, int allow_alloc); #define is_duplicate(dblock) ((dupfind(dblock)) ? 1 : 0) #define fsck_blockmap_set(ip, b, bt, m) \ _fsck_blockmap_set(ip, b, bt, m, 0, __FUNCTION__, __LINE__) #define fsck_blkmap_set_noino(ip, b, bt, m) \ _fsck_blockmap_set(ip, b, bt, m, 1, __FUNCTION__, __LINE__) enum meta_check_rc { meta_error = -1, meta_is_good = 0, meta_skip_further = 1, }; /* metawalk_fxns: function pointers to check various parts of the fs * * The functions should return -1 on fatal errors, 1 if the block * should be skipped, and 0 on success * * private: Data that should be passed to the fxns * check_leaf: * check_metalist: * check_data: * check_eattr_indir: * check_eattr_leaf: * check_dentry: * check_eattr_entry: * check_eattr_extentry: */ struct metawalk_fxns { void *private; int (*check_leaf_depth) (struct gfs2_inode *ip, uint64_t leaf_no, int ref_count, struct gfs2_buffer_head *lbh); int (*check_leaf) (struct gfs2_inode *ip, uint64_t block, void *private); /* parameters to the check_metalist sub-functions: ip: incore inode pointer block: block number of the metadata block to be checked bh: buffer_head to be returned h: height is_valid: returned as 1 if the metadata block is valid and should be added to the metadata list for further processing. was_duplicate: returns as 1 if the metadata block was determined to be a duplicate reference, in which case we want to skip adding it to the metadata list. private: Pointer to pass-specific data returns: 0 - everything is good, but there may be duplicates 1 - skip further processing */ int (*check_metalist) (struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private); int (*check_data) (struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private); int (*check_eattr_indir) (struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private); int (*check_eattr_leaf) (struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private); int (*check_dentry) (struct gfs2_inode *ip, struct gfs2_dirent *de, struct gfs2_dirent *prev, struct gfs2_buffer_head *bh, char *filename, uint32_t *count, int lindex, void *private); int (*check_eattr_entry) (struct gfs2_inode *ip, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private); int (*check_eattr_extentry) (struct gfs2_inode *ip, uint64_t *ea_data_ptr, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private); int (*finish_eattr_indir) (struct gfs2_inode *ip, int leaf_pointers, int leaf_pointer_errors, void *private); void (*big_file_msg) (struct gfs2_inode *ip, uint64_t blks_checked); int (*check_hash_tbl) (struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize, void *private); int (*repair_leaf) (struct gfs2_inode *ip, uint64_t *leaf_no, int lindex, int ref_count, const char *msg, void *private); int (*undo_check_meta) (struct gfs2_inode *ip, uint64_t block, int h, void *private); int (*undo_check_data) (struct gfs2_inode *ip, uint64_t block, void *private); }; #endif /* _METAWALK_H */ gfs2-utils/gfs2/fsck/pass1.c0000664000175000017500000015200012154127655014517 0ustar andyandy/* pass1 checks inodes for format & type, duplicate blocks, & incorrect * block count. * * It builds up tables that contains the state of each block (free, * block in use, metadata type, etc), as well as bad blocks and * duplicate blocks. (See block_list.[ch] for more info) * */ #include #include #include #include #include #include #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "fsck.h" #include "inode_hash.h" #include "util.h" #include "link.h" #include "metawalk.h" struct special_blocks gfs1_rindex_blks; struct block_count { uint64_t indir_count; uint64_t data_count; uint64_t ea_count; }; static int p1check_leaf(struct gfs2_inode *ip, uint64_t block, void *private); static int check_metalist(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private); static int undo_check_metalist(struct gfs2_inode *ip, uint64_t block, int h, void *private); static int check_data(struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private); static int undo_check_data(struct gfs2_inode *ip, uint64_t block, void *private); static int check_eattr_indir(struct gfs2_inode *ip, uint64_t indirect, uint64_t parent, struct gfs2_buffer_head **bh, void *private); static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private); static int check_eattr_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private); static int check_extended_leaf_eattr(struct gfs2_inode *ip, uint64_t *data_ptr, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private); static int finish_eattr_indir(struct gfs2_inode *ip, int leaf_pointers, int leaf_pointer_errors, void *private); static int invalidate_metadata(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private); static int invalidate_leaf(struct gfs2_inode *ip, uint64_t block, void *private); static int invalidate_data(struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private); static int invalidate_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private); static int invalidate_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private); static int handle_ip(struct gfs2_sbd *sdp, struct gfs2_inode *ip); static int pass1_repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no, int lindex, int ref_count, const char *msg, void *private) { repair_leaf(ip, leaf_no, lindex, ref_count, msg, 0); return 0; } struct metawalk_fxns pass1_fxns = { .private = NULL, .check_leaf = p1check_leaf, .check_metalist = check_metalist, .check_data = check_data, .check_eattr_indir = check_eattr_indir, .check_eattr_leaf = check_eattr_leaf, .check_dentry = NULL, .check_eattr_entry = check_eattr_entries, .check_eattr_extentry = check_extended_leaf_eattr, .finish_eattr_indir = finish_eattr_indir, .big_file_msg = big_file_comfort, .repair_leaf = pass1_repair_leaf, .undo_check_meta = undo_check_metalist, .undo_check_data = undo_check_data, }; struct metawalk_fxns invalidate_fxns = { .private = NULL, .check_metalist = invalidate_metadata, .check_data = invalidate_data, .check_leaf = invalidate_leaf, .check_eattr_indir = invalidate_eattr_indir, .check_eattr_leaf = invalidate_eattr_leaf, }; /* * resuscitate_metalist - make sure a system directory entry's metadata blocks * are marked "in use" in the bitmap. * * This function makes sure metadata blocks for system and root directories are * marked "in use" by the bitmap. You don't want root's indirect blocks * deleted, do you? Or worse, reused for lost+found. */ static int resuscitate_metalist(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private) { struct block_count *bc = (struct block_count *)private; *is_valid = 1; *was_duplicate = 0; *bh = NULL; if (!valid_block(ip->i_sbd, block)){ /* blk outside of FS */ fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("itself"), gfs2_bad_block); log_err( _("Bad indirect block pointer (invalid or out of " "range) found in system inode %lld (0x%llx).\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); *is_valid = 0; return meta_is_good; } if (fsck_system_inode(ip->i_sbd, block)) fsck_blockmap_set(ip, block, _("system file"), gfs2_indir_blk); else check_n_fix_bitmap(ip->i_sbd, block, 0, gfs2_indir_blk); bc->indir_count++; return meta_is_good; } /* * resuscitate_dentry - make sure a system directory entry is alive * * This function makes sure directory entries in system directories are * kept alive. You don't want journal0 deleted from jindex, do you? */ static int resuscitate_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent, struct gfs2_dirent *prev_de, struct gfs2_buffer_head *bh, char *filename, uint32_t *count, int lindex, void *priv) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_dirent dentry, *de; char tmp_name[PATH_MAX]; uint64_t block; enum gfs2_mark_block dinode_type; memset(&dentry, 0, sizeof(struct gfs2_dirent)); gfs2_dirent_in(&dentry, (char *)dent); de = &dentry; block = de->de_inum.no_addr; /* Start of checks */ memset(tmp_name, 0, sizeof(tmp_name)); if (de->de_name_len < sizeof(tmp_name)) strncpy(tmp_name, filename, de->de_name_len); else strncpy(tmp_name, filename, sizeof(tmp_name) - 1); if (!valid_block(sdp, block)) { log_err( _("Block # referenced by system directory entry %s " "in inode %lld (0x%llx) is invalid or out of range;" " ignored.\n"), tmp_name, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); return 0; } if (block == sdp->md.jiinode->i_di.di_num.no_addr) dinode_type = gfs2_inode_dir; else if (!sdp->gfs1 && (block == sdp->md.pinode->i_di.di_num.no_addr || block == sdp->master_dir->i_di.di_num.no_addr)) dinode_type = gfs2_inode_dir; else dinode_type = gfs2_inode_file; /* If this is a system dinode, we'll handle it later in check_system_inodes. If not, it'll be handled by pass1 but since it's in a system directory we need to make sure it's represented in the rgrp bitmap. */ if (fsck_system_inode(sdp, block)) fsck_blockmap_set(ip, block, _("system file"), dinode_type); else check_n_fix_bitmap(sdp, block, 0, dinode_type); /* Return the number of leaf entries so metawalk doesn't flag this leaf as having none. */ *count = be16_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_entries); return 0; } struct metawalk_fxns sysdir_fxns = { .private = NULL, .check_metalist = resuscitate_metalist, .check_dentry = resuscitate_dentry, }; static int p1check_leaf(struct gfs2_inode *ip, uint64_t block, void *private) { struct block_count *bc = (struct block_count *) private; uint8_t q; /* Note if we've gotten this far, the block has already passed the check in metawalk: gfs2_check_meta(lbh, GFS2_METATYPE_LF). So we know it's a leaf block. */ bc->indir_count++; q = block_type(block); if (q != gfs2_block_free) { log_err( _("Found duplicate block #%llu (0x%llx) referenced " "as a directory leaf in dinode " "%llu (0x%llx) - was marked %d (%s)\n"), (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, q, block_type_string(q)); add_duplicate_ref(ip, block, ref_as_meta, 0, INODE_VALID); if (q == gfs2_leaf_blk) /* If the previous reference also saw this as a leaf, it was already checked, so don't check again. */ return -EEXIST; } fsck_blockmap_set(ip, block, _("directory leaf"), gfs2_leaf_blk); return 0; } static int check_metalist(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private) { uint8_t q; int iblk_type; struct gfs2_buffer_head *nbh; struct block_count *bc = (struct block_count *)private; const char *blktypedesc; *bh = NULL; *was_duplicate = 0; *is_valid = 0; if (!valid_block(ip->i_sbd, block)) { /* blk outside of FS */ /* The bad dinode should be invalidated later due to "unrecoverable" errors. The inode itself should be set "free" and removed from the inodetree by undo_check_metalist. */ fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("bad block referencing"), gfs2_bad_block); log_debug( _("Bad indirect block (invalid/out of range) " "found in inode %lld (0x%llx).\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); return meta_skip_further; } if (is_dir(&ip->i_di, ip->i_sbd->gfs1) && h == ip->i_di.di_height) { iblk_type = GFS2_METATYPE_JD; blktypedesc = _("a directory hash table block"); } else { iblk_type = GFS2_METATYPE_IN; blktypedesc = _("a journaled data block"); } q = block_type(block); if (q != gfs2_block_free) { log_err( _("Found duplicate block #%llu (0x%llx) referenced " "as metadata in indirect block for dinode " "%llu (0x%llx) - was marked %d (%s)\n"), (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, q, block_type_string(q)); *was_duplicate = 1; } nbh = bread(ip->i_sbd, block); *is_valid = (gfs2_check_meta(nbh, iblk_type) == 0); if (!(*is_valid)) { log_err( _("Inode %lld (0x%llx) has a bad indirect block " "pointer %lld (0x%llx) (points to something " "that is not %s).\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)block, (unsigned long long)block, blktypedesc); brelse(nbh); return meta_skip_further; } bc->indir_count++; if (*was_duplicate) { add_duplicate_ref(ip, block, ref_as_meta, 0, *is_valid ? INODE_VALID : INODE_INVALID); brelse(nbh); } else { *bh = nbh; fsck_blockmap_set(ip, block, _("indirect"), gfs2_indir_blk); } if (*is_valid) return meta_is_good; return meta_skip_further; } /* undo_reference - undo previously processed data or metadata * We've treated the metadata for this dinode as good so far, but not we * realize it's bad. So we need to undo what we've done. * * Returns: 0 - We need to process the block as metadata. In other words, * we need to undo any blocks it refers to. * 1 - We can't process the block as metadata. */ static int undo_reference(struct gfs2_inode *ip, uint64_t block, int meta, void *private) { struct block_count *bc = (struct block_count *)private; struct duptree *dt; struct inode_with_dups *id; int old_bitmap_state = 0; struct rgrp_tree *rgd; if (!valid_block(ip->i_sbd, block)) { /* blk outside of FS */ fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("bad block referencing"), gfs2_block_free); return 1; } if (meta) bc->indir_count--; dt = dupfind(block); if (dt) { /* remove all duplicate reference structures from this inode */ do { id = find_dup_ref_inode(dt, ip); if (!id) break; dup_listent_delete(dt, id); } while (id); if (dt->refs) { log_err(_("Block %llu (0x%llx) is still referenced " "from another inode; not freeing.\n"), (unsigned long long)block, (unsigned long long)block); return 1; } } if (!meta) { rgd = gfs2_blk2rgrpd(ip->i_sbd, block); old_bitmap_state = lgfs2_get_bitmap(ip->i_sbd, block, rgd); if (old_bitmap_state == GFS2_BLKST_DINODE) return -1; } fsck_blockmap_set(ip, block, meta ? _("bad indirect") : _("referenced data"), gfs2_block_free); return 0; } static int undo_check_metalist(struct gfs2_inode *ip, uint64_t block, int h, void *private) { return undo_reference(ip, block, 1, private); } static int undo_check_data(struct gfs2_inode *ip, uint64_t block, void *private) { return undo_reference(ip, block, 0, private); } /* blockmap_set_as_data - set block as 'data' in the blockmap, if not dinode * * This function tries to set a block that's referenced as data as 'data' * in the fsck blockmap. But if that block is marked as 'dinode' in the * rgrp bitmap, it does additional checks to see if it looks like a dinode. * Note that previous checks were done for duplicate references, so this * is checking for dinodes that we haven't processed yet. */ static int blockmap_set_as_data(struct gfs2_inode *ip, uint64_t block) { int error; struct gfs2_buffer_head *bh; struct gfs2_dinode *di; error = fsck_blkmap_set_noino(ip, block, _("data"), gfs2_block_used); if (!error) return 0; error = 0; /* The bitmap says it's a dinode, but a block reference begs to differ. So which is it? */ bh = bread(ip->i_sbd, block); if (gfs2_check_meta(bh, GFS2_METATYPE_DI) != 0) goto out; /* The meta header agrees it's a dinode. But it might be data in disguise, so do some extra checks. */ di = (struct gfs2_dinode *)bh->b_data; if (be64_to_cpu(di->di_num.no_addr) != block) goto out; log_err(_("Inode %lld (0x%llx) has a reference to block %lld (0x%llx) " "as a data block, but it appears to be a dinode we " "haven't checked yet.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)block, (unsigned long long)block); error = -1; out: if (!error) fsck_blockmap_set(ip, block, _("data"), gfs2_block_used); brelse(bh); return error; } static int check_data(struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private) { uint8_t q; struct block_count *bc = (struct block_count *) private; if (!valid_block(ip->i_sbd, block)) { log_err( _("inode %lld (0x%llx) has a bad data block pointer " "%lld (0x%llx) (invalid or out of range) "), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)block, (unsigned long long)block); if (metablock == ip->i_di.di_num.no_addr) log_err("\n"); else log_err(_("from metadata block %llu (0x%llx)\n"), (unsigned long long)metablock, (unsigned long long)metablock); /* Mark the owner of this block with the bad_block * designator so we know to check it for out of range * blocks later */ fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("bad (out of range) data"), gfs2_bad_block); return -1; } bc->data_count++; /* keep the count sane anyway */ q = block_type(block); if (q != gfs2_block_free) { log_err( _("Found duplicate %s block %llu (0x%llx) " "referenced as data by dinode %llu (0x%llx) "), block_type_string(q), (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (metablock == ip->i_di.di_num.no_addr) log_err("\n"); else log_err(_("from metadata block %llu (0x%llx)\n"), (unsigned long long)metablock, (unsigned long long)metablock); if (q >= gfs2_indir_blk && q <= gfs2_jdata) { log_info(_("The block was processed earlier as valid " "metadata, so it can't possibly be " "data.\n")); /* We still need to add a duplicate record here because when check_metatree tries to delete the inode, we can't have the "undo" functions freeing the block out from other the original referencing inode. */ add_duplicate_ref(ip, block, ref_as_data, 0, INODE_VALID); return 1; } if (q != gfs2_meta_inval) { log_info( _("Seems to be a normal duplicate; I'll " "sort it out in pass1b.\n")); add_duplicate_ref(ip, block, ref_as_data, 0, INODE_VALID); /* This inode references the block as data. So if this all is validated, we want to keep this count. */ return 0; } log_info( _("The block was invalid as metadata but might be " "okay as data. I'll sort it out in pass1b.\n")); add_duplicate_ref(ip, block, ref_as_data, 0, INODE_VALID); return 0; } /* In gfs1, rgrp indirect blocks are marked in the bitmap as "meta". In gfs2, "meta" is only for dinodes. So here we dummy up the blocks so that the bitmap isn't changed improperly. */ if (ip->i_sbd->gfs1 && ip == ip->i_sbd->md.riinode) { log_info(_("Block %lld (0x%llx) is a GFS1 rindex block\n"), (unsigned long long)block, (unsigned long long)block); gfs2_special_set(&gfs1_rindex_blks, block); fsck_blockmap_set(ip, block, _("rgrp"), gfs2_indir_blk); /*gfs2_meta_rgrp);*/ } else if (ip->i_sbd->gfs1 && ip->i_di.di_flags & GFS2_DIF_JDATA) { log_info(_("Block %lld (0x%llx) is a GFS1 journaled data " "block\n"), (unsigned long long)block, (unsigned long long)block); fsck_blockmap_set(ip, block, _("jdata"), gfs2_jdata); } else return blockmap_set_as_data(ip, block); return 0; } static int remove_inode_eattr(struct gfs2_inode *ip, struct block_count *bc) { undo_reference(ip, ip->i_di.di_eattr, 0, bc); ip->i_di.di_eattr = 0; bc->ea_count = 0; ip->i_di.di_blocks = 1 + bc->indir_count + bc->data_count; ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT; bmodified(ip->i_bh); return 0; } static int ask_remove_inode_eattr(struct gfs2_inode *ip, struct block_count *bc) { if (ip->i_di.di_eattr == 0) return 0; /* eattr was removed prior to this call */ log_err( _("Inode %lld (0x%llx) has unrecoverable Extended Attribute " "errors.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (query( _("Clear all Extended Attributes from the inode? (y/n) "))){ if (!remove_inode_eattr(ip, bc)) log_err( _("Extended attributes were removed.\n")); else log_err( _("Unable to remove inode eattr pointer; " "the error remains.\n")); } else { log_err( _("Extended attributes were not removed.\n")); } return 0; } /* clear_eas - clear the extended attributes for an inode * * @ip - in core inode pointer * @bc - pointer to a block count structure * block - the block that had the problem * duplicate - if this is a duplicate block, don't set it "free" * emsg - what to tell the user about the eas being checked * Returns: 1 if the EA is fixed, else 0 if it was not fixed. */ static int clear_eas(struct gfs2_inode *ip, struct block_count *bc, uint64_t block, int duplicate, const char *emsg) { log_err( _("Inode #%llu (0x%llx): %s"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, emsg); log_err( _(" at block #%lld (0x%llx).\n"), (unsigned long long)block, (unsigned long long)block); if (query( _("Clear the bad Extended Attribute? (y/n) "))) { if (block == ip->i_di.di_eattr) { remove_inode_eattr(ip, bc); log_err( _("The bad extended attribute was " "removed.\n")); } else if (!duplicate) { delete_block(ip, block, NULL, _("bad extended attribute"), NULL); } return 1; } else { log_err( _("The bad Extended Attribute was not fixed.\n")); bc->ea_count++; return 0; } } static int check_eattr_indir(struct gfs2_inode *ip, uint64_t indirect, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; int ret = 0; uint8_t q; struct block_count *bc = (struct block_count *) private; /* This inode contains an eattr - it may be invalid, but the * eattr attributes points to a non-zero block */ if (!valid_block(sdp, indirect)) { /* Doesn't help to mark this here - this gets checked * in pass1c */ return 1; } q = block_type(indirect); /* Special duplicate processing: If we have an EA block, check if it really is an EA. If it is, let duplicate handling sort it out. If it isn't, clear it but don't count it as a duplicate. */ *bh = bread(sdp, indirect); if (gfs2_check_meta(*bh, GFS2_METATYPE_IN)) { if (q != gfs2_block_free) { /* Duplicate? */ add_duplicate_ref(ip, indirect, ref_as_ea, 0, INODE_VALID); if (!clear_eas(ip, bc, indirect, 1, _("Bad indirect Extended Attribute " "duplicate found"))) bc->ea_count++; return 1; } clear_eas(ip, bc, indirect, 0, _("Extended Attribute indirect block has incorrect " "type")); return 1; } if (q != gfs2_block_free) { /* Duplicate? */ log_err( _("Inode #%llu (0x%llx): Duplicate Extended " "Attribute indirect block found at #%llu " "(0x%llx).\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)indirect, (unsigned long long)indirect); add_duplicate_ref(ip, indirect, ref_as_ea, 0, INODE_VALID); bc->ea_count++; ret = 1; } else { fsck_blockmap_set(ip, indirect, _("indirect Extended Attribute"), gfs2_indir_blk); bc->ea_count++; } return ret; } static int finish_eattr_indir(struct gfs2_inode *ip, int leaf_pointers, int leaf_pointer_errors, void *private) { struct block_count *bc = (struct block_count *) private; osi_list_t *head; struct special_blocks *b = NULL; if (leaf_pointer_errors == leaf_pointers) /* All eas were bad */ return ask_remove_inode_eattr(ip, bc); log_debug( _("Marking inode #%llu (0x%llx) with extended " "attribute block\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); /* Mark the inode as having an eattr in the block map so pass1c can check it. We may have previously added this inode to the eattr_blocks list and if we did, it would be the first one on the list. So check that one only (to save time) and if that one matches, no need to add it again. */ if (!osi_list_empty(&ip->i_sbd->eattr_blocks.list)) { head = &ip->i_sbd->eattr_blocks.list; b = osi_list_entry(head->next, struct special_blocks, list); } if (!b || b->block != ip->i_di.di_num.no_addr) gfs2_special_add(&ip->i_sbd->eattr_blocks, ip->i_di.di_num.no_addr); if (!leaf_pointer_errors) return 0; log_err( _("Inode %lld (0x%llx) has recoverable indirect " "Extended Attribute errors.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (query( _("Okay to fix the block count for the inode? (y/n) "))) { ip->i_di.di_blocks = 1 + bc->indir_count + bc->data_count + bc->ea_count; bmodified(ip->i_bh); log_err(_("Block count fixed: 1+%lld+%lld+%lld = %lld.\n"), (unsigned long long)bc->indir_count, (unsigned long long)bc->data_count, (unsigned long long)bc->ea_count, (unsigned long long)ip->i_di.di_blocks); return 1; } log_err( _("Block count not fixed.\n")); return 1; } /* check_ealeaf_block * checks an extended attribute (not directory) leaf block */ static int check_ealeaf_block(struct gfs2_inode *ip, uint64_t block, int btype, struct gfs2_buffer_head **bh, void *private) { struct gfs2_buffer_head *leaf_bh = NULL; struct gfs2_sbd *sdp = ip->i_sbd; uint8_t q; struct block_count *bc = (struct block_count *) private; q = block_type(block); /* Special duplicate processing: If we have an EA block, check if it really is an EA. If it is, let duplicate handling sort it out. If it isn't, clear it but don't count it as a duplicate. */ leaf_bh = bread(sdp, block); if (gfs2_check_meta(leaf_bh, btype)) { if (q != gfs2_block_free) { /* Duplicate? */ add_duplicate_ref(ip, block, ref_as_ea, 0, INODE_VALID); clear_eas(ip, bc, block, 1, _("Bad Extended Attribute duplicate found")); } else { clear_eas(ip, bc, block, 0, _("Extended Attribute leaf block " "has incorrect type")); } brelse(leaf_bh); return 1; } if (q != gfs2_block_free) { /* Duplicate? */ log_debug( _("Duplicate block found at #%lld (0x%llx).\n"), (unsigned long long)block, (unsigned long long)block); add_duplicate_ref(ip, block, ref_as_data, 0, INODE_VALID); bc->ea_count++; brelse(leaf_bh); return 1; } if (ip->i_di.di_eattr == 0) { /* Can only get in here if there were unrecoverable ea errors that caused clear_eas to be called. What we need to do here is remove the subsequent ea blocks. */ clear_eas(ip, bc, block, 0, _("Extended Attribute block removed due to " "previous errors.\n")); brelse(leaf_bh); return 1; } /* Point of confusion: We've got to set the ea block itself to gfs2_meta_eattr here. Elsewhere we mark the inode with gfs2_eattr_block meaning it contains an eattr for pass1c. */ fsck_blockmap_set(ip, block, _("Extended Attribute"), gfs2_meta_eattr); bc->ea_count++; *bh = leaf_bh; return 0; } /** * check_extended_leaf_eattr * @ip * @el_blk: block number of the extended leaf * * An EA leaf block can contain EA's with pointers to blocks * where the data for that EA is kept. Those blocks still * have the gfs2 meta header of type GFS2_METATYPE_EA * * Returns: 0 if correct[able], -1 if removal is needed */ static int check_extended_leaf_eattr(struct gfs2_inode *ip, uint64_t *data_ptr, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { uint64_t el_blk = be64_to_cpu(*data_ptr); struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *bh = NULL; int error; if (!valid_block(sdp, el_blk)) { log_err( _("Inode #%llu (0x%llx): Extended Attribute block " "%llu (0x%llx) has an extended leaf block #%llu " "(0x%llx) that is invalid or out of range.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_eattr, (unsigned long long)ip->i_di.di_eattr, (unsigned long long)el_blk, (unsigned long long)el_blk); fsck_blockmap_set(ip, ip->i_di.di_eattr, _("bad (out of range) Extended Attribute "), gfs2_bad_block); return 1; } error = check_ealeaf_block(ip, el_blk, GFS2_METATYPE_ED, &bh, private); if (bh) brelse(bh); return error; } static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; osi_list_t *head; struct special_blocks *b = NULL; /* This inode contains an eattr - it may be invalid, but the * eattr attributes points to a non-zero block. * Clarification: If we're here we're checking a leaf block, and the * source dinode needs to be marked as having extended attributes. * That instructs pass1c to check the contents of the ea blocks. */ log_debug( _("Setting inode %lld (0x%llx) as having eattr " "block(s) attached.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (!osi_list_empty(&ip->i_sbd->eattr_blocks.list)) { head = &ip->i_sbd->eattr_blocks.list; b = osi_list_entry(head->next, struct special_blocks, list); } if (!b || b->block != ip->i_di.di_num.no_addr) gfs2_special_add(&sdp->eattr_blocks, ip->i_di.di_num.no_addr); if (!valid_block(sdp, block)) { log_warn( _("Inode #%llu (0x%llx): Extended Attribute leaf " "block #%llu (0x%llx) is invalid or out of " "range.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)block, (unsigned long long)block); fsck_blockmap_set(ip, ip->i_di.di_eattr, _("bad (out of range) Extended " "Attribute leaf"), gfs2_bad_block); return 1; } return check_ealeaf_block(ip, block, GFS2_METATYPE_EA, bh, private); } static int check_eattr_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; char ea_name[256]; if (!ea_hdr->ea_name_len){ /* Skip this entry for now */ return 1; } memset(ea_name, 0, sizeof(ea_name)); strncpy(ea_name, (char *)ea_hdr + sizeof(struct gfs2_ea_header), ea_hdr->ea_name_len); if (!GFS2_EATYPE_VALID(ea_hdr->ea_type) && ((ea_hdr_prev) || (!ea_hdr_prev && ea_hdr->ea_type))){ /* Skip invalid entry */ return 1; } if (ea_hdr->ea_num_ptrs){ uint32_t avail_size; int max_ptrs; avail_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); max_ptrs = (be32_to_cpu(ea_hdr->ea_data_len)+avail_size-1)/avail_size; if (max_ptrs > ea_hdr->ea_num_ptrs) { return 1; } else { log_debug( _(" Pointers Required: %d\n Pointers Reported: %d\n"), max_ptrs, ea_hdr->ea_num_ptrs); } } return 0; } /** * mark_block_invalid - mark blocks associated with an inode as invalid * unless the block is a duplicate. * * An "invalid" block is now considered free in the bitmap, and pass2 will * delete any invalid blocks. This is nearly identical to function * delete_block_if_notdup. */ static int mark_block_invalid(struct gfs2_inode *ip, uint64_t block, enum dup_ref_type reftype, const char *btype, int *is_valid, int *was_duplicate) { uint8_t q; /* If the block isn't valid, we obviously can't invalidate it. * However, if we return an error, invalidating will stop, and * we want it to continue to invalidate the valid blocks. If we * don't do this, block references that follow that are also * referenced elsewhere (duplicates) won't be flagged as such, * and as a result, they'll be freed when this dinode is deleted, * despite being used by another dinode as a valid block. */ if (is_valid) *is_valid = 1; if (was_duplicate) *was_duplicate = 0; if (!valid_block(ip->i_sbd, block)) { if (is_valid) *is_valid = 0; return meta_is_good; } q = block_type(block); if (q != gfs2_block_free) { if (was_duplicate) *was_duplicate = 1; add_duplicate_ref(ip, block, reftype, 0, INODE_INVALID); log_info( _("%s block %lld (0x%llx), part of inode " "%lld (0x%llx), was previously referenced so " "the invalid reference is ignored.\n"), btype, (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); return meta_is_good; } fsck_blockmap_set(ip, block, btype, gfs2_meta_inval); return meta_is_good; } static int invalidate_metadata(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private) { *is_valid = 1; *was_duplicate = 0; return mark_block_invalid(ip, block, ref_as_meta, _("metadata"), is_valid, was_duplicate); } static int invalidate_leaf(struct gfs2_inode *ip, uint64_t block, void *private) { return mark_block_invalid(ip, block, ref_as_meta, _("leaf"), NULL, NULL); } static int invalidate_data(struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private) { return mark_block_invalid(ip, block, ref_as_data, _("data"), NULL, NULL); } static int invalidate_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { return mark_block_invalid(ip, block, ref_as_ea, _("indirect extended attribute"), NULL, NULL); } static int invalidate_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { return mark_block_invalid(ip, block, ref_as_ea, _("extended attribute"), NULL, NULL); } /** * Check for massive amounts of pointer corruption. If the block has * lots of out-of-range pointers, we can't trust any of the pointers. * For example, a stray pointer with a value of 0x1d might be * corruption/nonsense, and if so, we don't want to delete an * important file (like master or the root directory) because of it. * We need to check for a large number of bad pointers BEFORE we start * messing with them because we don't want to mark a block as a * duplicate (for example) until we know if the pointers in general can * be trusted. Thus it needs to be in a separate loop. * Returns: 0 if good range, otherwise != 0 */ enum b_types { btype_meta, btype_leaf, btype_data, btype_ieattr, btype_eattr}; const char *btypes[5] = { "metadata", "leaf", "data", "indirect extended attribute", "extended attribute" }; static int rangecheck_block(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, enum b_types btype, void *private) { long *bad_pointers = (long *)private; uint8_t q; if (!valid_block(ip->i_sbd, block)) { (*bad_pointers)++; log_info( _("Bad %s block pointer (invalid or out of range " "#%ld) found in inode %lld (0x%llx).\n"), btypes[btype], *bad_pointers, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if ((*bad_pointers) <= BAD_POINTER_TOLERANCE) return meta_is_good; else return meta_error; /* Exits check_metatree quicker */ } /* See how many duplicate blocks it has */ q = block_type(block); if (q != gfs2_block_free) { (*bad_pointers)++; log_info( _("Duplicated %s block pointer (violation %ld, block" " %lld (0x%llx)) found in inode %lld (0x%llx).\n"), btypes[btype], *bad_pointers, (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if ((*bad_pointers) <= BAD_POINTER_TOLERANCE) return meta_is_good; else return meta_error; /* Exits check_metatree quicker */ } return meta_is_good; } static int rangecheck_metadata(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private) { *is_valid = 1; *was_duplicate = 0; return rangecheck_block(ip, block, bh, btype_meta, private); } static int rangecheck_leaf(struct gfs2_inode *ip, uint64_t block, void *private) { return rangecheck_block(ip, block, NULL, btype_leaf, private); } static int rangecheck_data(struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private) { return rangecheck_block(ip, block, NULL, btype_data, private); } static int rangecheck_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { return rangecheck_block(ip, block, NULL, btype_ieattr, private); } static int rangecheck_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { return rangecheck_block(ip, block, NULL, btype_eattr, private); } struct metawalk_fxns rangecheck_fxns = { .private = NULL, .check_metalist = rangecheck_metadata, .check_data = rangecheck_data, .check_leaf = rangecheck_leaf, .check_eattr_indir = rangecheck_eattr_indir, .check_eattr_leaf = rangecheck_eattr_leaf, }; /* * handle_ip - process an incore structure representing a dinode. */ static int handle_ip(struct gfs2_sbd *sdp, struct gfs2_inode *ip) { int error; struct block_count bc = {0}; long bad_pointers; uint64_t lf_blks = 0; bad_pointers = 0L; /* First, check the metadata for massive amounts of pointer corruption. Such corruption can only lead us to ruin trying to clean it up, so it's better to check it up front and delete the inode if there is corruption. */ rangecheck_fxns.private = &bad_pointers; error = check_metatree(ip, &rangecheck_fxns); if (bad_pointers > BAD_POINTER_TOLERANCE) { log_err( _("Error: inode %llu (0x%llx) has more than " "%d bad pointers.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, BAD_POINTER_TOLERANCE); fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("badly corrupt"), gfs2_block_free); return 0; } error = set_ip_blockmap(ip, 1); if (error == -EINVAL) { /* We found a dinode that has an invalid mode. At this point set_ip_blockmap returned an error, which means it never got inserted into the inode tree. Since we haven't even processed its metadata with pass1_fxns, none of its metadata will be flagged as metadata or data blocks yet. Therefore, we don't need to invalidate anything. */ fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("invalid mode"), gfs2_block_free); return 0; } else if (error) goto bad_dinode; if (set_di_nlink(ip)) goto bad_dinode; if (lf_dip) lf_blks = lf_dip->i_di.di_blocks; pass1_fxns.private = &bc; error = check_metatree(ip, &pass1_fxns); /* Pass1 may have added some blocks to lost+found by virtue of leafs that were misplaced. If it did, we need to reprocess lost+found to correctly account for its blocks. */ if (lf_dip && lf_dip->i_di.di_blocks != lf_blks) reprocess_inode(lf_dip, "lost+found"); /* We there was an error, we return 0 because we want fsck to continue and analyze the other dinodes as well. */ if (fsck_abort) return 0; if (!error) { error = check_inode_eattr(ip, &pass1_fxns); if (error && !(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) ask_remove_inode_eattr(ip, &bc); } if (ip->i_di.di_blocks != (1 + bc.indir_count + bc.data_count + bc.ea_count)) { log_err( _("Inode #%llu (0x%llx): Ondisk block count (%llu" ") does not match what fsck found (%llu)\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_blocks, (unsigned long long)1 + bc.indir_count + bc.data_count + bc.ea_count); log_info( _("inode has: %lld, but fsck counts: Dinode:1 + " "indir:%lld + data: %lld + ea: %lld\n"), (unsigned long long)ip->i_di.di_blocks, (unsigned long long)bc.indir_count, (unsigned long long)bc.data_count, (unsigned long long)bc.ea_count); if (query( _("Fix ondisk block count? (y/n) "))) { ip->i_di.di_blocks = 1 + bc.indir_count + bc.data_count + bc.ea_count; bmodified(ip->i_bh); log_err( _("Block count for #%llu (0x%llx) fixed\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); } else log_err( _("Bad block count for #%llu (0x%llx" ") not fixed\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); } return 0; bad_dinode: stack; return -1; } /* * handle_di - This is now a wrapper function that takes a gfs2_buffer_head * and calls handle_ip, which takes an in-code dinode structure. */ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh) { int error = 0; uint64_t block = bh->b_blocknr; struct gfs2_inode *ip; ip = fsck_inode_get(sdp, bh); if (ip->i_di.di_num.no_addr != block) { log_err( _("Inode #%llu (0x%llx): Bad inode address found: %llu " "(0x%llx)\n"), (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (query( _("Fix address in inode at block #%llu" " (0x%llx)? (y/n) "), (unsigned long long)block, (unsigned long long)block)) { ip->i_di.di_num.no_addr = ip->i_di.di_num.no_formal_ino = block; bmodified(ip->i_bh); } else log_err( _("Address in inode at block #%llu" " (0x%llx) not fixed\n"), (unsigned long long)block, (unsigned long long)block); } if (sdp->gfs1 && ip->i_di.di_num.no_formal_ino != block) { log_err( _("Inode #%llu (0x%llx): GFS1 formal inode number " "mismatch: was %llu (0x%llx)\n"), (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_formal_ino, (unsigned long long)ip->i_di.di_num.no_formal_ino); if (query( _("Fix formal inode number in inode #%llu" " (0x%llx)? (y/n) "), (unsigned long long)block, (unsigned long long)block)) { ip->i_di.di_num.no_formal_ino = block; bmodified(ip->i_bh); } else log_err( _("Inode number in inode at block #%lld " "(0x%llx) not fixed\n"), (unsigned long long)block, (unsigned long long)block); } error = handle_ip(sdp, ip); fsck_inode_put(&ip); return error; } /* Check system inode and verify it's marked "in use" in the bitmap: */ /* Should work for all system inodes: root, master, jindex, per_node, etc. */ /* We have to pass the sysinode as ** because the pointer may change out from under the reference by way of the builder() function. */ static int check_system_inode(struct gfs2_sbd *sdp, struct gfs2_inode **sysinode, const char *filename, int builder(struct gfs2_sbd *sdp), enum gfs2_mark_block mark) { uint64_t iblock = 0; struct dir_status ds = {0}; int error, err = 0; log_info( _("Checking system inode '%s'\n"), filename); if (*sysinode) { /* Read in the system inode, look at its dentries, and start * reading through them */ iblock = (*sysinode)->i_di.di_num.no_addr; log_info( _("System inode for '%s' is located at block %llu" " (0x%llx)\n"), filename, (unsigned long long)iblock, (unsigned long long)iblock); if (gfs2_check_meta((*sysinode)->i_bh, GFS2_METATYPE_DI)) { log_err( _("Found invalid system dinode at block #" "%llu (0x%llx)\n"), (unsigned long long)iblock, (unsigned long long)iblock); gfs2_blockmap_set(bl, iblock, gfs2_block_free); check_n_fix_bitmap(sdp, iblock, 0, gfs2_block_free); inode_put(sysinode); } } if (*sysinode) { ds.q = block_type(iblock); /* If the inode exists but the block is marked free, we might be recovering from a corrupt bitmap. In that case, don't rebuild the inode. Just reuse the inode and fix the bitmap. */ if (ds.q == gfs2_block_free) { log_info( _("The inode exists but the block is not " "marked 'in use'; fixing it.\n")); fsck_blockmap_set(*sysinode, (*sysinode)->i_di.di_num.no_addr, filename, mark); ds.q = mark; if (mark == gfs2_inode_dir) dirtree_insert((*sysinode)->i_di.di_num); } } else log_info( _("System inode for '%s' is corrupt or missing.\n"), filename); /* If there are errors with the inode here, we need to create a new inode and get it all setup - of course, everything will be in lost+found then, but we *need* our system inodes before we can do any of that. */ if (!(*sysinode) || ds.q != mark) { log_err( _("Invalid or missing %s system inode (should be %d, " "is %d).\n"), filename, mark, ds.q); if (query(_("Create new %s system inode? (y/n) "), filename)) { log_err( _("Rebuilding system file \"%s\"\n"), filename); error = builder(sdp); if (error) { log_err( _("Error trying to rebuild system " "file %s: Cannot continue\n"), filename); return error; } fsck_blockmap_set(*sysinode, (*sysinode)->i_di.di_num.no_addr, filename, mark); ds.q = mark; if (mark == gfs2_inode_dir) dirtree_insert((*sysinode)->i_di.di_num); } else { log_err( _("Cannot continue without valid %s inode\n"), filename); return -1; } } if (is_dir(&(*sysinode)->i_di, sdp->gfs1)) { struct block_count bc = {0}; sysdir_fxns.private = &bc; if ((*sysinode)->i_di.di_flags & GFS2_DIF_EXHASH) check_metatree(*sysinode, &sysdir_fxns); else { err = check_linear_dir(*sysinode, (*sysinode)->i_bh, &sysdir_fxns); /* If we encountered an error in our directory check we should still call handle_ip, but return the error later. */ if (err) log_err(_("Error found in %s while checking " "directory entries.\n"), filename); } } error = handle_ip(sdp, *sysinode); return error ? error : err; } static int build_a_journal(struct gfs2_sbd *sdp) { char name[256]; int err = 0; /* First, try to delete the journal if it's in jindex */ sprintf(name, "journal%u", sdp->md.journals); gfs2_dirent_del(sdp->md.jiinode, name, strlen(name)); /* Now rebuild it */ err = build_journal(sdp, sdp->md.journals, sdp->md.jiinode); if (err) { log_crit(_("Error %d building journal\n"), err); exit(FSCK_ERROR); } return 0; } static int check_system_inodes(struct gfs2_sbd *sdp) { int journal_count; /******************************************************************* ******* Check the system inode integrity ************* *******************************************************************/ /* Mark the master system dinode as a "dinode" in the block map. All other system dinodes in master will be taken care of by function resuscitate_metalist. But master won't since it has no parent.*/ if (!sdp->gfs1) { fsck_blockmap_set(sdp->master_dir, sdp->master_dir->i_di.di_num.no_addr, "master", gfs2_inode_dir); if (check_system_inode(sdp, &sdp->master_dir, "master", build_master, gfs2_inode_dir)) { stack; return -1; } } /* Mark the root dinode as a "dinode" in the block map as we did for master, since it has no parent. */ fsck_blockmap_set(sdp->md.rooti, sdp->md.rooti->i_di.di_num.no_addr, "root", gfs2_inode_dir); if (check_system_inode(sdp, &sdp->md.rooti, "root", build_root, gfs2_inode_dir)) { stack; return -1; } if (!sdp->gfs1 && check_system_inode(sdp, &sdp->md.inum, "inum", build_inum, gfs2_inode_file)) { stack; return -1; } if (check_system_inode(sdp, &sdp->md.statfs, "statfs", build_statfs, gfs2_inode_file)) { stack; return -1; } if (check_system_inode(sdp, &sdp->md.jiinode, "jindex", build_jindex, (sdp->gfs1 ? gfs2_inode_file : gfs2_inode_dir))) { stack; return -1; } if (check_system_inode(sdp, &sdp->md.riinode, "rindex", build_rindex, gfs2_inode_file)) { stack; return -1; } if (check_system_inode(sdp, &sdp->md.qinode, "quota", build_quota, gfs2_inode_file)) { stack; return -1; } if (!sdp->gfs1 && check_system_inode(sdp, &sdp->md.pinode, "per_node", build_per_node, gfs2_inode_dir)) { stack; return -1; } /* We have to play a trick on build_journal: We swap md.journals in order to keep a count of which journal we need to build. */ journal_count = sdp->md.journals; /* gfs1's journals aren't dinode, they're just a bunch of blocks. */ if (sdp->gfs1) { /* gfs1 has four dinodes that are set in the superblock and therefore not linked to anything else. We need to adjust the link counts so pass4 doesn't get confused. */ incr_link_count(sdp->md.statfs->i_di.di_num, NULL, _("gfs1 statfs inode")); incr_link_count(sdp->md.jiinode->i_di.di_num, NULL, _("gfs1 jindex inode")); incr_link_count(sdp->md.riinode->i_di.di_num, NULL, _("gfs1 rindex inode")); incr_link_count(sdp->md.qinode->i_di.di_num, NULL, _("gfs1 quota inode")); return 0; } for (sdp->md.journals = 0; sdp->md.journals < journal_count; sdp->md.journals++) { char jname[16]; sprintf(jname, "journal%d", sdp->md.journals); if (check_system_inode(sdp, &sdp->md.journal[sdp->md.journals], jname, build_a_journal, gfs2_inode_file)) { stack; return -1; } } return 0; } static int pass1_process_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uint64_t *ibuf, unsigned n) { struct gfs2_buffer_head *bh; unsigned i; uint64_t block; struct gfs2_inode *ip; uint8_t q; for (i = 0; i < n; i++) { int is_inode; uint32_t check_magic; block = ibuf[i]; /* skip gfs1 rindex indirect blocks */ if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) { log_debug(_("Skipping rindex indir block " "%lld (0x%llx)\n"), (unsigned long long)block, (unsigned long long)block); continue; } warm_fuzzy_stuff(block); if (fsck_abort) { /* if asked to abort */ gfs2_special_free(&gfs1_rindex_blks); return FSCK_OK; } if (skip_this_pass) { printf( _("Skipping pass 1 is not a good idea.\n")); skip_this_pass = FALSE; fflush(stdout); } if (fsck_system_inode(sdp, block)) { log_debug(_("Already processed system inode " "%lld (0x%llx)\n"), (unsigned long long)block, (unsigned long long)block); continue; } bh = bread(sdp, block); is_inode = 0; if (gfs2_check_meta(bh, GFS2_METATYPE_DI) == 0) is_inode = 1; check_magic = ((struct gfs2_meta_header *) (bh->b_data))->mh_magic; q = block_type(block); if (q != gfs2_block_free) { if (be32_to_cpu(check_magic) == GFS2_MAGIC && sdp->gfs1 && !is_inode) { log_debug("Block 0x%llx assumed to be " "previously processed GFS1 " "non-dinode metadata.\n", (unsigned long long)block); brelse(bh); continue; } log_err( _("Found a duplicate inode block at #%llu " "(0x%llx) previously marked as a %s\n"), (unsigned long long)block, (unsigned long long)block, block_type_string(q)); ip = fsck_inode_get(sdp, bh); if (is_inode && ip->i_di.di_num.no_addr == block) add_duplicate_ref(ip, block, ref_is_inode, 0, INODE_VALID); else log_info(_("dinum.no_addr is wrong, so I " "assume the bitmap is just " "wrong.\n")); fsck_inode_put(&ip); brelse(bh); continue; } if (!is_inode) { if (be32_to_cpu(check_magic) == GFS2_MAGIC) { /* In gfs2, a bitmap mark of 2 means an inode, but in gfs1 it means any metadata. So if this is gfs1 and not an inode, it may be okay. If it's non-dinode metadata, it will be referenced by an inode, so we need to skip it here and it will be sorted out when the referencing inode is checked. */ if (sdp->gfs1) { log_debug( _("Deferring GFS1 " "metadata block #" "%" PRIu64" (0x%" PRIx64 ")\n"), block, block); brelse(bh); continue; } } log_err( _("Found invalid inode at block #" "%llu (0x%llx)\n"), (unsigned long long)block, (unsigned long long)block); check_n_fix_bitmap(sdp, block, 0, gfs2_block_free); } else if (handle_di(sdp, bh) < 0) { stack; brelse(bh); gfs2_special_free(&gfs1_rindex_blks); return FSCK_ERROR; } /* Ignore everything else - they should be hit by the handle_di step. Don't check NONE either, because check_meta passes everything if GFS2_METATYPE_NONE is specified. Hopefully, other metadata types such as indirect blocks will be handled when the inode itself is processed, and if it's not, it should be caught in pass5. */ brelse(bh); } return 0; } static int pass1_process_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd) { unsigned k, n, i; uint64_t *ibuf = malloc(sdp->bsize * GFS2_NBBY * sizeof(uint64_t)); int ret; for (k = 0; k < rgd->ri.ri_length; k++) { n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_DINODE); if (n) { ret = pass1_process_bitmap(sdp, rgd, ibuf, n); if (ret) return ret; } /* For GFS1, we have to count the "free meta" blocks in the resource group and mark them specially so we can count them properly in pass5. */ if (!sdp->gfs1) continue; n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_UNLINKED); for (i = 0; i < n; i++) gfs2_blockmap_set(bl, ibuf[i], gfs2_freemeta); } free(ibuf); return 0; } /** * pass1 - walk through inodes and check inode state * * this walk can be done using root inode and depth first search, * watching for repeat inode numbers * * format & type * link count * duplicate blocks * bad blocks * inodes size * dir info */ int pass1(struct gfs2_sbd *sdp) { struct osi_node *n, *next = NULL; struct rgrp_tree *rgd; uint64_t i; uint64_t rg_count = 0; int ret; osi_list_init(&gfs1_rindex_blks.list); /* FIXME: In the gfs fsck, we had to mark things like the * journals and indices and such as 'other_meta' - in gfs2, * the journals are files and are found in the normal file * sweep - is there any metadata we need to mark here before * the sweeps start that we won't find otherwise? */ /* Make sure the system inodes are okay & represented in the bitmap. */ check_system_inodes(sdp); /* So, do we do a depth first search starting at the root * inode, or use the rg bitmaps, or just read every fs block * to find the inodes? If we use the depth first search, why * have pass3 at all - if we use the rg bitmaps, pass5 is at * least partially invalidated - if we read every fs block, * things will probably be intolerably slow. The current fsck * uses the rg bitmaps, so maybe that's the best way to start * things - we can change the method later if necessary. */ for (n = osi_first(&sdp->rgtree); n; n = next, rg_count++) { next = osi_next(n); log_debug( _("Checking metadata in Resource Group #%llu\n"), (unsigned long long)rg_count); rgd = (struct rgrp_tree *)n; for (i = 0; i < rgd->ri.ri_length; i++) { log_debug( _("rgrp block %lld (0x%llx) " "is now marked as 'rgrp data'\n"), rgd->ri.ri_addr + i, rgd->ri.ri_addr + i); if (gfs2_blockmap_set(bl, rgd->ri.ri_addr + i, gfs2_indir_blk)) { stack; gfs2_special_free(&gfs1_rindex_blks); return FSCK_ERROR; } /* rgrps and bitmaps don't have bits to represent their blocks, so don't do this: check_n_fix_bitmap(sdp, rgd->ri.ri_addr + i, 0, gfs2_meta_rgrp);*/ } ret = pass1_process_rgrp(sdp, rgd); if (ret) return ret; } gfs2_special_free(&gfs1_rindex_blks); return FSCK_OK; } gfs2-utils/gfs2/fsck/pass1b.c0000664000175000017500000005243512171730231014661 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "fsck.h" #include "osi_list.h" #include "util.h" #include "metawalk.h" #include "inode_hash.h" struct fxn_info { uint64_t block; int found; int ea_only; /* The only dups were found in EAs */ }; struct dup_handler { struct duptree *dt; int ref_inode_count; int ref_count; }; static void log_inode_reference(struct duptree *dt, osi_list_t *tmp, int inval) { char reftypestring[32]; struct inode_with_dups *id; id = osi_list_entry(tmp, struct inode_with_dups, list); if (id->dup_count == 1) sprintf(reftypestring, "as %s", reftypes[get_ref_type(id)]); else sprintf(reftypestring, "%d/%d/%d/%d", id->reftypecount[ref_is_inode], id->reftypecount[ref_as_data], id->reftypecount[ref_as_meta], id->reftypecount[ref_as_ea]); if (inval) log_warn( _("Invalid ")); log_warn( _("Inode %s (%lld/0x%llx) has %d reference(s) to " "block %llu (0x%llx) (%s)\n"), id->name, (unsigned long long)id->block_no, (unsigned long long)id->block_no, id->dup_count, (unsigned long long)dt->block, (unsigned long long)dt->block, reftypestring); } /* delete_all_dups - delete all duplicate records for a given inode */ /* * resolve_dup_references - resolve all but the last dinode that has a * duplicate reference to a given block. * * @sdp - pointer to the superblock structure * @dt - pointer to the duplicate reference rbtree to use * @ref_list - list of duplicate references to be resolved (invalid or valid) * @dh - duplicate handler * inval - The references on this ref_list are invalid. We prefer to delete * these first before resorting to deleting valid dinodes. * acceptable_ref - Delete dinodes that reference the given block as anything * _but_ this type. Try to save references as this type. */ static void resolve_dup_references(struct gfs2_sbd *sdp, struct duptree *dt, osi_list_t *ref_list, struct dup_handler *dh, int inval, int acceptable_ref) { struct gfs2_inode *ip; struct inode_with_dups *id; osi_list_t *tmp, *x; struct metawalk_fxns pass1b_fxns_delete = { .private = NULL, .check_metalist = delete_metadata, .check_data = delete_data, .check_leaf = delete_leaf, .check_eattr_indir = delete_eattr_indir, .check_eattr_leaf = delete_eattr_leaf, .check_eattr_entry = delete_eattr_entry, .check_eattr_extentry = delete_eattr_extentry, }; enum dup_ref_type this_ref; struct inode_info *ii; int found_good_ref = 0; uint8_t q; osi_list_foreach_safe(tmp, ref_list, x) { if (skip_this_pass || fsck_abort) return; id = osi_list_entry(tmp, struct inode_with_dups, list); dh->dt = dt; if (dh->ref_inode_count == 1) /* down to the last reference */ return; this_ref = get_ref_type(id); q = block_type(id->block_no); if (inval) log_warn( _("Invalid ")); /* FIXME: If we already found an acceptable reference to this * block, we should really duplicate the block and fix all * references to it in this inode. Unfortunately, we would * have to traverse the entire metadata tree to do that. */ if (acceptable_ref != ref_types && /* If we're nuking all but an acceptable reference type and */ this_ref == acceptable_ref && /* this ref is acceptable */ !found_good_ref) { /* We haven't found a good reference */ /* If this is an invalid inode, but not on the invalid list, it's better to delete it. */ if (q != gfs2_inode_invalid) { found_good_ref = 1; log_warn( _("Inode %s (%lld/0x%llx)'s " "reference to block %llu (0x%llx) " "as '%s' is acceptable.\n"), id->name, (unsigned long long)id->block_no, (unsigned long long)id->block_no, (unsigned long long)dt->block, (unsigned long long)dt->block, reftypes[this_ref]); continue; /* don't delete the dinode */ } } /* If this reference is from a system inode, for example, if it's data or metadata inside a journal, the reference should take priority over user dinodes that reference the block. */ if (!found_good_ref && fsck_system_inode(sdp, id->block_no)) { found_good_ref = 1; continue; /* don't delete the dinode */ } log_warn( _("Inode %s (%lld/0x%llx) references block " "%llu (0x%llx) as '%s', but the block is " "really %s.\n"), id->name, (unsigned long long)id->block_no, (unsigned long long)id->block_no, (unsigned long long)dt->block, (unsigned long long)dt->block, reftypes[this_ref], reftypes[acceptable_ref]); if (!(query( _("Okay to delete %s inode %lld (0x%llx)? " "(y/n) "), (inval ? _("invalidated") : ""), (unsigned long long)id->block_no, (unsigned long long)id->block_no))) { log_warn( _("The bad inode was not cleared.")); /* delete the list entry so we don't leak memory but leave the reference count. If we decrement the ref count, we could get down to 1 and the dinode would be changed without a 'Yes' answer. */ /* (dh->ref_inode_count)--;*/ dup_listent_delete(dt, id); continue; } if (q == gfs2_block_free) log_warn( _("Inode %lld (0x%llx) was previously " "deleted.\n"), (unsigned long long)id->block_no, (unsigned long long)id->block_no); else log_warn(_("Pass1b is deleting inode %lld (0x%llx).\n"), (unsigned long long)id->block_no, (unsigned long long)id->block_no); ip = fsck_load_inode(sdp, id->block_no); /* If we've already deleted this dinode, don't try to delete it again. That could free blocks that used to be duplicate references that are now resolved (and gone). */ if (q != gfs2_block_free) { /* Clear the EAs for the inode first */ check_inode_eattr(ip, &pass1b_fxns_delete); /* If the reference was as metadata or data, we've got a corrupt dinode that will be deleted. */ if (inval || id->reftypecount[ref_as_data] || id->reftypecount[ref_as_meta]) { /* Remove the inode from the inode tree */ ii = inodetree_find(ip->i_di.di_num.no_addr); if (ii) inodetree_delete(ii); fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("duplicate referencing bad"), gfs2_inode_invalid); /* We delete the dup_handler inode count and duplicate id BEFORE clearing the metadata, because if this is the last reference to this metadata block, we need to traverse the tree and free the data blocks it references. However, we don't want to delete other duplicates that may be used by other dinodes. */ (dh->ref_inode_count)--; /* FIXME: other option should be to duplicate the block for each duplicate and point the metadata at the cloned blocks */ check_metatree(ip, &pass1b_fxns_delete); } } /* Now we've got to go through an delete any other duplicate references from this dinode we're deleting. If we don't, pass1b will discover the other duplicate record, try to delete this dinode a second time, and this time its earlier duplicate references won't be seen as duplicates anymore (because they were eliminated earlier in pass1b). And so the blocks will be mistakenly freed, when, in fact, they're still being referenced by a valid dinode. */ delete_all_dups(ip); fsck_inode_put(&ip); /* out, brelse, free */ } return; } /* revise_dup_handler - get current information about a duplicate reference * * Function resolve_dup_references can delete dinodes that reference blocks * which may have duplicate references. Therefore, the duplicate tree is * constantly being changed. This function revises the duplicate handler so * that it accurately matches what's in the duplicate tree regarding this block */ static void revise_dup_handler(uint64_t dup_blk, struct dup_handler *dh) { osi_list_t *tmp; struct duptree *dt; struct inode_with_dups *id; dh->ref_inode_count = 0; dh->ref_count = 0; dh->dt = NULL; dt = dupfind(dup_blk); if (!dt) return; dh->dt = dt; /* Count the duplicate references, both valid and invalid */ osi_list_foreach(tmp, &dt->ref_invinode_list) { id = osi_list_entry(tmp, struct inode_with_dups, list); dh->ref_inode_count++; dh->ref_count += id->dup_count; } osi_list_foreach(tmp, &dt->ref_inode_list) { id = osi_list_entry(tmp, struct inode_with_dups, list); dh->ref_inode_count++; dh->ref_count += id->dup_count; } } /* handle_dup_blk - handle a duplicate block reference. * * This function should resolve and delete the duplicate block reference given, * iow dt. */ static int handle_dup_blk(struct gfs2_sbd *sdp, struct duptree *dt) { osi_list_t *tmp; struct gfs2_inode *ip; struct inode_with_dups *id; struct dup_handler dh = {0}; struct gfs2_buffer_head *bh; uint32_t cmagic, ctype; enum dup_ref_type acceptable_ref; uint64_t dup_blk; dup_blk = dt->block; revise_dup_handler(dup_blk, &dh); /* Log the duplicate references */ log_notice( _("Block %llu (0x%llx) has %d inodes referencing it" " for a total of %d duplicate references:\n"), (unsigned long long)dt->block, (unsigned long long)dt->block, dh.ref_inode_count, dh.ref_count); osi_list_foreach(tmp, &dt->ref_invinode_list) log_inode_reference(dt, tmp, 1); osi_list_foreach(tmp, &dt->ref_inode_list) log_inode_reference(dt, tmp, 0); /* Figure out the block type to see if we can eliminate references to a different type. In other words, if the duplicate block looks like metadata, we can delete dinodes that reference it as data. If the block doesn't look like metadata, we can eliminate any references to it as metadata. Dinodes with such references are clearly corrupt and need to be deleted. And if we're left with a single reference, problem solved. */ bh = bread(sdp, dt->block); cmagic = ((struct gfs2_meta_header *)(bh->b_data))->mh_magic; ctype = ((struct gfs2_meta_header *)(bh->b_data))->mh_type; brelse(bh); /* If this is a dinode, any references to it (except in directory entries) are invalid and should be deleted. */ if (be32_to_cpu(cmagic) == GFS2_MAGIC && be32_to_cpu(ctype) == GFS2_METATYPE_DI) acceptable_ref = ref_is_inode; else if (be32_to_cpu(cmagic) == GFS2_MAGIC && (be32_to_cpu(ctype) == GFS2_METATYPE_EA || be32_to_cpu(ctype) == GFS2_METATYPE_ED)) acceptable_ref = ref_as_ea; else if (be32_to_cpu(cmagic) == GFS2_MAGIC && be32_to_cpu(ctype) <= GFS2_METATYPE_QC) acceptable_ref = ref_as_meta; else acceptable_ref = ref_as_data; /* A single reference to the block implies a possible situation where a data pointer points to a metadata block. In other words, the duplicate reference in the file system is (1) Metadata block X and (2) A dinode reference such as a data pointer pointing to block X. We can't really check for that in pass1 because user data might just _look_ like metadata by coincidence, and at the time we're checking, we might not have processed the referenced block. Here in pass1b we're sure. */ /* Another possibility here is that there is a single reference because all the other metadata references were in inodes that got invalidated for other reasons, such as bad pointers. So we need to make sure at this point that any inode deletes reverse out any duplicate reference before we get to this point. */ /* Step 1 - eliminate references from inodes that are not valid. * This may be because they were deleted due to corruption. * All block types are unacceptable, so we use ref_types. */ if (dh.ref_count > 1) { log_debug( _("----------------------------------------------\n" "Step 1: Eliminate references to block %llu " "(0x%llx) that were previously marked " "invalid.\n"), (unsigned long long)dt->block, (unsigned long long)dt->block); resolve_dup_references(sdp, dt, &dt->ref_invinode_list, &dh, 1, ref_types); revise_dup_handler(dup_blk, &dh); } /* Step 2 - eliminate reference from inodes that reference it as the * wrong type. For example, a data file referencing it as * a data block, but it's really a metadata block. Or a * directory inode referencing a data block as a leaf block. */ if (dh.ref_count > 1) { log_debug( _("----------------------------------------------\n" "Step 2: Eliminate references to block %llu " "(0x%llx) that need the wrong block type.\n"), (unsigned long long)dt->block, (unsigned long long)dt->block); resolve_dup_references(sdp, dt, &dt->ref_inode_list, &dh, 0, acceptable_ref); revise_dup_handler(dup_blk, &dh); } /* Step 3 - We have multiple dinodes referencing it as the correct * type. Just blast one of them. * All block types are fair game, so we use ref_types. */ if (dh.ref_count > 1) { log_debug( _("----------------------------------------------\n" "Step 3: Choose one reference to block %llu " "(0x%llx) to keep.\n"), (unsigned long long)dt->block, (unsigned long long)dt->block); resolve_dup_references(sdp, dt, &dt->ref_inode_list, &dh, 0, ref_types); revise_dup_handler(dup_blk, &dh); } /* If there's still a last remaining reference, and it's a valid reference, use it to determine the correct block type for our blockmap and bitmap. */ if (dh.ref_inode_count == 1 && !osi_list_empty(&dt->ref_inode_list)) { uint8_t q; log_notice( _("Block %llu (0x%llx) has only one remaining " "valid inode referencing it.\n"), (unsigned long long)dup_blk, (unsigned long long)dup_blk); /* If we're down to a single reference (and not all references deleted, which may be the case of an inode that has only itself and a reference), we need to reset the block type from invalid to data or metadata. Start at the first one in the list, not the structure's place holder. */ tmp = dt->ref_inode_list.next; id = osi_list_entry(tmp, struct inode_with_dups, list); log_debug( _("----------------------------------------------\n" "Step 4. Set block type based on the remaining " "reference in inode %lld (0x%llx).\n"), (unsigned long long)id->block_no, (unsigned long long)id->block_no); ip = fsck_load_inode(sdp, id->block_no); q = block_type(id->block_no); if (q == gfs2_inode_invalid) { log_debug( _("The remaining reference inode %lld " "(0x%llx) is marked invalid: Marking " "the block as free.\n"), (unsigned long long)id->block_no, (unsigned long long)id->block_no); fsck_blockmap_set(ip, dt->block, _("reference-repaired leaf"), gfs2_block_free); } else if (id->reftypecount[ref_is_inode]) { set_ip_blockmap(ip, 0); /* 0=do not add to dirtree */ } else if (id->reftypecount[ref_as_data]) { fsck_blockmap_set(ip, dt->block, _("reference-repaired data"), gfs2_block_used); } else if (id->reftypecount[ref_as_meta]) { if (is_dir(&ip->i_di, sdp->gfs1)) fsck_blockmap_set(ip, dt->block, _("reference-repaired leaf"), gfs2_leaf_blk); else fsck_blockmap_set(ip, dt->block, _("reference-repaired " "indirect"), gfs2_indir_blk); } else fsck_blockmap_set(ip, dt->block, _("reference-repaired extended " "attribute"), gfs2_meta_eattr); fsck_inode_put(&ip); /* out, brelse, free */ log_debug(_("Done with duplicate reference to block 0x%llx\n"), (unsigned long long)dt->block); dup_delete(dt); } else { /* They may have answered no and not fixed all references. */ log_debug( _("All duplicate references to block 0x%llx were " "processed.\n"), (unsigned long long)dup_blk); if (dh.ref_count) { log_debug(_("Done with duplicate reference to block " "0x%llx, but %d references remain.\n"), (unsigned long long)dup_blk, dh.ref_count); } else { log_notice( _("Block %llu (0x%llx) has no more " "references; Marking as 'free'.\n"), (unsigned long long)dup_blk, (unsigned long long)dup_blk); if (dh.dt) dup_delete(dh.dt); /* Now fix the block type of the block in question. */ gfs2_blockmap_set(bl, dup_blk, gfs2_block_free); check_n_fix_bitmap(sdp, dup_blk, 0, gfs2_block_free); } } return 0; } static int check_leaf_refs(struct gfs2_inode *ip, uint64_t block, void *private) { return add_duplicate_ref(ip, block, ref_as_meta, 1, INODE_VALID); } static int check_metalist_refs(struct gfs2_inode *ip, uint64_t block, struct gfs2_buffer_head **bh, int h, int *is_valid, int *was_duplicate, void *private) { *was_duplicate = 0; *is_valid = 1; return add_duplicate_ref(ip, block, ref_as_meta, 1, INODE_VALID); } static int check_data_refs(struct gfs2_inode *ip, uint64_t metablock, uint64_t block, void *private) { return add_duplicate_ref(ip, block, ref_as_data, 1, INODE_VALID); } static int check_eattr_indir_refs(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; int error; error = add_duplicate_ref(ip, block, ref_as_ea, 1, INODE_VALID); if (!error) *bh = bread(sdp, block); return error; } static int check_eattr_leaf_refs(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; int error; error = add_duplicate_ref(ip, block, ref_as_ea, 1, INODE_VALID); if (!error) *bh = bread(sdp, block); return error; } static int check_eattr_entry_refs(struct gfs2_inode *ip, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { return 0; } static int check_eattr_extentry_refs(struct gfs2_inode *ip, uint64_t *ea_data_ptr, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { uint64_t block = be64_to_cpu(*ea_data_ptr); return add_duplicate_ref(ip, block, ref_as_ea, 1, INODE_VALID); } /* Finds all references to duplicate blocks in the metadata */ /* Finds all references to duplicate blocks in the metadata */ static int find_block_ref(struct gfs2_sbd *sdp, uint64_t inode) { struct gfs2_inode *ip; int error = 0; struct metawalk_fxns find_refs = { .private = NULL, .check_leaf = check_leaf_refs, .check_metalist = check_metalist_refs, .check_data = check_data_refs, .check_eattr_indir = check_eattr_indir_refs, .check_eattr_leaf = check_eattr_leaf_refs, .check_eattr_entry = check_eattr_entry_refs, .check_eattr_extentry = check_eattr_extentry_refs, }; ip = fsck_load_inode(sdp, inode); /* bread, inode_get */ /* double-check the meta header just to be sure it's metadata */ if (ip->i_di.di_header.mh_magic != GFS2_MAGIC || ip->i_di.di_header.mh_type != GFS2_METATYPE_DI) { log_debug( _("Block %lld (0x%llx) is not gfs2 metadata.\n"), (unsigned long long)inode, (unsigned long long)inode); error = 1; goto out; } /* Check to see if this inode was referenced by another by mistake */ add_duplicate_ref(ip, inode, ref_is_inode, 1, INODE_VALID); /* Check this dinode's metadata for references to known duplicates */ error = check_metatree(ip, &find_refs); if (error < 0) stack; /* Check for ea references in the inode */ if (!error) error = check_inode_eattr(ip, &find_refs); out: fsck_inode_put(&ip); /* out, brelse, free */ return error; } /* Pass 1b handles finding the previous inode for a duplicate block * When found, store the inodes pointing to the duplicate block for * use in pass2 */ int pass1b(struct gfs2_sbd *sdp) { struct duptree *dt; uint64_t i; uint8_t q; struct osi_node *n; int rc = FSCK_OK; log_info( _("Looking for duplicate blocks...\n")); /* If there were no dups in the bitmap, we don't need to do anymore */ if (dup_blocks.osi_node == NULL) { log_info( _("No duplicate blocks found\n")); return FSCK_OK; } /* Rescan the fs looking for pointers to blocks that are in * the duplicate block map */ log_info( _("Scanning filesystem for inodes containing duplicate blocks...\n")); log_debug( _("Filesystem has %llu (0x%llx) blocks total\n"), (unsigned long long)last_fs_block, (unsigned long long)last_fs_block); for (i = 0; i < last_fs_block; i++) { if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ goto out; if (dups_found_first == dups_found) { log_debug(_("Found all %d original references to " "duplicates.\n"), dups_found); break; } q = block_type(i); if (q < gfs2_inode_dir) continue; if (q > gfs2_inode_invalid) continue; if (q == gfs2_inode_invalid) log_debug( _("Checking invalidated duplicate dinode " "%lld (0x%llx)\n"), (unsigned long long)i, (unsigned long long)i); warm_fuzzy_stuff(i); if (find_block_ref(sdp, i) < 0) { stack; rc = FSCK_ERROR; goto out; } } /* Fix dups here - it's going to slow things down a lot to fix * it later */ log_info( _("Handling duplicate blocks\n")); out: /* Resolve all duplicates by clearing out the dup tree */ while ((n = osi_first(&dup_blocks))) { dt = (struct duptree *)n; if (!skip_this_pass && !rc) /* no error & not asked to skip the rest */ handle_dup_blk(sdp, dt); } return rc; } gfs2-utils/gfs2/fsck/pass1c.c0000664000175000017500000001777512154127655014705 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "fsck.h" #include "util.h" #include "metawalk.h" struct metawalk_fxns pass1c_fxns_delete = { .private = NULL, .check_eattr_indir = delete_eattr_indir, .check_eattr_leaf = delete_eattr_leaf, }; static int remove_eattr_entry(struct gfs2_sbd *sdp, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *curr, struct gfs2_ea_header *prev) { if (!prev) curr->ea_type = GFS2_EATYPE_UNUSED; else { uint32_t tmp32 = be32_to_cpu(curr->ea_rec_len) + be32_to_cpu(prev->ea_rec_len); prev->ea_rec_len = cpu_to_be32(tmp32); if (curr->ea_flags & GFS2_EAFLAG_LAST) prev->ea_flags |= GFS2_EAFLAG_LAST; } log_err( _("Bad Extended Attribute at block #%llu" " (0x%llx) removed.\n"), (unsigned long long)leaf_bh->b_blocknr, (unsigned long long)leaf_bh->b_blocknr); bmodified(leaf_bh); return 0; } static int ask_remove_eattr_entry(struct gfs2_sbd *sdp, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *curr, struct gfs2_ea_header *prev, int fix_curr, int fix_curr_len) { if (query( _("Remove the bad Extended Attribute entry? (y/n) "))) { if (fix_curr) curr->ea_flags |= GFS2_EAFLAG_LAST; if (fix_curr_len) { uint32_t max_size = sdp->sd_sb.sb_bsize; uint32_t offset = (uint32_t)(((unsigned long)curr) - ((unsigned long)leaf_bh->b_data)); curr->ea_rec_len = cpu_to_be32(max_size - offset); } if (remove_eattr_entry(sdp, leaf_bh, curr, prev)) { stack; return -1; } } else { log_err( _("Bad Extended Attribute not removed.\n")); } return 1; } static int ask_remove_eattr(struct gfs2_inode *ip) { if (query( _("Remove the bad Extended Attribute? (y/n) "))) { check_inode_eattr(ip, &pass1c_fxns_delete); bmodified(ip->i_bh); log_err( _("Bad Extended Attribute removed.\n")); return 1; } log_err( _("Bad Extended Attribute not removed.\n")); return 0; } static int check_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; uint8_t q; struct gfs2_buffer_head *indir_bh = NULL; if (!valid_block(sdp, block)) { log_err( _("Extended attributes indirect block #%llu" " (0x%llx) for inode #%llu" " (0x%llx) is invalid...removing\n"), (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); return ask_remove_eattr(ip); } q = block_type(block); if (q != gfs2_indir_blk) { log_err( _("Extended attributes indirect block #%llu" " (0x%llx) for inode #%llu" " (0x%llx) is invalid.\n"), (unsigned long long)block, (unsigned long long)block, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); return ask_remove_eattr(ip); } else indir_bh = bread(sdp, block); *bh = indir_bh; return 0; } static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; uint8_t q; if (!valid_block(sdp, block)) { log_err( _("Extended attributes block for inode #%llu" " (0x%llx) is invalid.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); return ask_remove_eattr(ip); } q = block_type(block); if (q != gfs2_meta_eattr) { log_err( _("Extended attributes block for inode #%llu" " (0x%llx) invalid.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); return ask_remove_eattr(ip); } else *bh = bread(sdp, block); return 0; } static int check_eattr_entry(struct gfs2_inode *ip, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { struct gfs2_sbd *sdp = ip->i_sbd; char ea_name[256]; uint32_t offset = (uint32_t)(((unsigned long)ea_hdr) - ((unsigned long)leaf_bh->b_data)); uint32_t max_size = sdp->sd_sb.sb_bsize; if (!ea_hdr->ea_name_len){ log_err( _("EA has name length of zero\n")); return ask_remove_eattr_entry(sdp, leaf_bh, ea_hdr, ea_hdr_prev, 1, 1); } if (offset + be32_to_cpu(ea_hdr->ea_rec_len) > max_size){ log_err( _("EA rec length too long\n")); return ask_remove_eattr_entry(sdp, leaf_bh, ea_hdr, ea_hdr_prev, 1, 1); } if (offset + be32_to_cpu(ea_hdr->ea_rec_len) == max_size && (ea_hdr->ea_flags & GFS2_EAFLAG_LAST) == 0){ log_err( _("last EA has no last entry flag\n")); return ask_remove_eattr_entry(sdp, leaf_bh, ea_hdr, ea_hdr_prev, 0, 0); } if (!ea_hdr->ea_name_len){ log_err( _("EA has name length of zero\n")); return ask_remove_eattr_entry(sdp, leaf_bh, ea_hdr, ea_hdr_prev, 0, 0); } memset(ea_name, 0, sizeof(ea_name)); strncpy(ea_name, (char *)ea_hdr + sizeof(struct gfs2_ea_header), ea_hdr->ea_name_len); if (!GFS2_EATYPE_VALID(ea_hdr->ea_type) && ((ea_hdr_prev) || (!ea_hdr_prev && ea_hdr->ea_type))){ log_err( _("EA (%s) type is invalid (%d > %d).\n"), ea_name, ea_hdr->ea_type, GFS2_EATYPE_LAST); return ask_remove_eattr_entry(sdp, leaf_bh, ea_hdr, ea_hdr_prev, 0, 0); } if (ea_hdr->ea_num_ptrs){ uint32_t avail_size; int max_ptrs; avail_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); max_ptrs = (be32_to_cpu(ea_hdr->ea_data_len)+avail_size-1)/avail_size; if (max_ptrs > ea_hdr->ea_num_ptrs){ log_err( _("EA (%s) has incorrect number of pointers.\n"), ea_name); log_err( _(" Required: %d\n Reported: %d\n"), max_ptrs, ea_hdr->ea_num_ptrs); return ask_remove_eattr_entry(sdp, leaf_bh, ea_hdr, ea_hdr_prev, 0, 0); } else { log_debug( _(" Pointers Required: %d\n Pointers Reported: %d\n"), max_ptrs, ea_hdr->ea_num_ptrs); } } return 0; } static int check_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_ptr, struct gfs2_buffer_head *leaf_bh, struct gfs2_ea_header *ea_hdr, struct gfs2_ea_header *ea_hdr_prev, void *private) { uint8_t q; struct gfs2_sbd *sdp = ip->i_sbd; q = block_type(be64_to_cpu(*ea_ptr)); if (q != gfs2_meta_eattr) { if (remove_eattr_entry(sdp, leaf_bh, ea_hdr, ea_hdr_prev)){ stack; return -1; } return 1; } return 0; } /* Go over all inodes with extended attributes and verify the EAs are * valid */ int pass1c(struct gfs2_sbd *sdp) { uint64_t block_no = 0; struct gfs2_buffer_head *bh; struct gfs2_inode *ip = NULL; struct metawalk_fxns pass1c_fxns = { 0 }; int error = 0; osi_list_t *tmp, *x; struct special_blocks *ea_block; pass1c_fxns.check_eattr_indir = &check_eattr_indir; pass1c_fxns.check_eattr_leaf = &check_eattr_leaf; pass1c_fxns.check_eattr_entry = &check_eattr_entry; pass1c_fxns.check_eattr_extentry = &check_eattr_extentry; pass1c_fxns.private = NULL; log_info( _("Looking for inodes containing ea blocks...\n")); osi_list_foreach_safe(tmp, &sdp->eattr_blocks.list, x) { ea_block = osi_list_entry(tmp, struct special_blocks, list); block_no = ea_block->block; warm_fuzzy_stuff(block_no); if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; bh = bread(sdp, block_no); if (!gfs2_check_meta(bh, GFS2_METATYPE_DI)) { /* if a dinode */ log_info( _("EA in inode %llu (0x%llx)\n"), (unsigned long long)block_no, (unsigned long long)block_no); gfs2_special_clear(&sdp->eattr_blocks, block_no); ip = fsck_inode_get(sdp, bh); ip->bh_owned = 1; log_debug( _("Found eattr at %llu (0x%llx)\n"), (unsigned long long)ip->i_di.di_eattr, (unsigned long long)ip->i_di.di_eattr); /* FIXME: Handle walking the eattr here */ error = check_inode_eattr(ip, &pass1c_fxns); if (error < 0) { stack; brelse(bh); return FSCK_ERROR; } fsck_inode_put(&ip); /* dinode_out, brelse, free */ } else { brelse(bh); } } return FSCK_OK; } gfs2-utils/gfs2/fsck/pass2.c0000664000175000017500000017656212171730231014530 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "fsck.h" #include "util.h" #include "eattr.h" #include "metawalk.h" #include "link.h" #include "lost_n_found.h" #include "inode_hash.h" #define MAX_FILENAME 256 struct metawalk_fxns pass2_fxns; struct metawalk_fxns delete_eattrs = { .check_eattr_indir = delete_eattr_indir, .check_eattr_leaf = delete_eattr_leaf, .check_eattr_entry = delete_eattr_entry, .check_eattr_extentry = delete_eattr_extentry, }; /* Set children's parent inode in dir_info structure - ext2 does not set * dotdot inode here, but instead in pass3 - should we? */ static int set_parent_dir(struct gfs2_sbd *sdp, struct gfs2_inum child, struct gfs2_inum parent) { struct dir_info *di; di = dirtree_find(child.no_addr); if (!di) { log_err( _("Unable to find block %llu (0x%llx" ") in dir_info list\n"), (unsigned long long)child.no_addr, (unsigned long long)child.no_addr); return -1; } if (di->dinode.no_addr == child.no_addr && di->dinode.no_formal_ino == child.no_formal_ino) { if (di->treewalk_parent) { log_err( _("Another directory at block %lld (0x%llx) " "already contains this child %lld (0x%llx)" " - checking parent %lld (0x%llx)\n"), (unsigned long long)di->treewalk_parent, (unsigned long long)di->treewalk_parent, (unsigned long long)child.no_addr, (unsigned long long)child.no_addr, (unsigned long long)parent.no_addr, (unsigned long long)parent.no_addr); return 1; } log_debug( _("Child %lld (0x%llx) has parent %lld (0x%llx)\n"), (unsigned long long)child.no_addr, (unsigned long long)child.no_addr, (unsigned long long)parent.no_addr, (unsigned long long)parent.no_addr); di->treewalk_parent = parent.no_addr; } return 0; } /* Set's the child's '..' directory inode number in dir_info structure */ static int set_dotdot_dir(struct gfs2_sbd *sdp, uint64_t childblock, struct gfs2_inum parent) { struct dir_info *di; di = dirtree_find(childblock); if (!di) { log_err( _("Unable to find block %"PRIu64" (0x%" PRIx64 ") in dir_info tree\n"), childblock, childblock); return -1; } if (di->dinode.no_addr != childblock) { log_debug("'..' doesn't point to what we found: childblock " "(0x%llx) != dinode (0x%llx)\n", (unsigned long long)childblock, (unsigned long long)di->dinode.no_addr); return -1; } /* Special case for root inode because we set it earlier */ if (di->dotdot_parent.no_addr && sdp->md.rooti->i_di.di_num.no_addr != di->dinode.no_addr) { /* This should never happen */ log_crit( _("Dotdot parent already set for block %llu (0x%llx)" "-> %llu (0x%llx)\n"), (unsigned long long)childblock, (unsigned long long)childblock, (unsigned long long)di->dotdot_parent.no_addr, (unsigned long long)di->dotdot_parent.no_addr); return -1; } log_debug("Setting '..' for directory block (0x%llx) to parent " "(0x%llx)\n", (unsigned long long)childblock, (unsigned long long)parent.no_addr); di->dotdot_parent.no_addr = parent.no_addr; di->dotdot_parent.no_formal_ino = parent.no_formal_ino; return 0; } static int check_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { *bh = bread(ip->i_sbd, block); return 0; } static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent, struct gfs2_buffer_head **bh, void *private) { *bh = bread(ip->i_sbd, block); return 0; } static const char *de_type_string(uint8_t de_type) { const char *de_types[15] = {"unknown", "fifo", "chrdev", "invalid", "directory", "invalid", "blkdev", "invalid", "file", "invalid", "symlink", "invalid", "socket", "invalid", "wht"}; if (de_type < 15) return de_types[de_type]; return de_types[3]; /* invalid */ } static int check_file_type(uint8_t de_type, uint8_t blk_type, int gfs1) { switch(blk_type) { case gfs2_inode_dir: if (de_type != (gfs1 ? GFS_FILE_DIR : DT_DIR)) return 1; break; case gfs2_inode_file: if (de_type != (gfs1 ? GFS_FILE_REG : DT_REG)) return 1; break; case gfs2_inode_lnk: if (de_type != (gfs1 ? GFS_FILE_LNK : DT_LNK)) return 1; break; case gfs2_inode_device: if ((de_type != (gfs1 ? GFS_FILE_BLK : DT_BLK)) && (de_type != (gfs1 ? GFS_FILE_CHR : DT_CHR))) return 1; break; case gfs2_inode_fifo: if (de_type != (gfs1 ? GFS_FILE_FIFO : DT_FIFO)) return 1; break; case gfs2_inode_sock: if (de_type != (gfs1 ? GFS_FILE_SOCK : DT_SOCK)) return 1; break; default: log_err( _("Invalid block type\n")); return -1; break; } return 0; } struct metawalk_fxns pass2_fxns_delete = { .private = NULL, .check_metalist = delete_metadata, .check_data = delete_data, .check_leaf = delete_leaf, .check_eattr_indir = delete_eattr_indir, .check_eattr_leaf = delete_eattr_leaf, .check_eattr_entry = delete_eattr_entry, .check_eattr_extentry = delete_eattr_extentry, }; /* bad_formal_ino - handle mismatches in formal inode number * Returns: 0 if the dirent was repaired * 1 if the caller should delete the dirent */ static int bad_formal_ino(struct gfs2_inode *ip, struct gfs2_dirent *dent, struct gfs2_inum entry, const char *tmp_name, uint8_t q, struct gfs2_dirent *de, struct gfs2_buffer_head *bh) { struct inode_info *ii; struct gfs2_inode *child_ip; struct gfs2_inum childs_dotdot; struct gfs2_sbd *sdp = ip->i_sbd; int error; ii = inodetree_find(entry.no_addr); log_err( _("Directory entry '%s' pointing to block %llu (0x%llx) in " "directory %llu (0x%llx) has the wrong 'formal' inode " "number.\n"), tmp_name, (unsigned long long)entry.no_addr, (unsigned long long)entry.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); log_err( _("The directory entry has %llu (0x%llx) but the inode has " "%llu (0x%llx)\n"), (unsigned long long)entry.no_formal_ino, (unsigned long long)entry.no_formal_ino, (unsigned long long)ii->di_num.no_formal_ino, (unsigned long long)ii->di_num.no_formal_ino); if (q != gfs2_inode_dir || !strcmp("..", tmp_name)) { if (query( _("Remove the corrupt directory entry? (y/n) "))) return 1; log_err( _("Corrupt directory entry not removed.\n")); return 0; } /* We have a directory pointing to another directory, but the formal inode number still doesn't match. If that directory has a '..' pointing back, just fix up the no_formal_ino. */ child_ip = lgfs2_inode_read(sdp, entry.no_addr); error = dir_search(child_ip, "..", 2, NULL, &childs_dotdot); if (!error && childs_dotdot.no_addr == ip->i_di.di_num.no_addr) { log_err( _("The entry points to another directory with intact " "linkage.\n")); if (query( _("Fix the bad directory entry? (y/n) "))) { log_err( _("Fixing the corrupt directory entry.\n")); entry.no_formal_ino = ii->di_num.no_formal_ino; de->de_inum.no_formal_ino = entry.no_formal_ino; gfs2_dirent_out(de, (char *)dent); bmodified(bh); incr_link_count(entry, ip, _("fixed reference")); set_parent_dir(sdp, entry, ip->i_di.di_num); } else { log_err( _("Directory entry not fixed.\n")); } } else { if (query( _("Remove the corrupt directory entry? (y/n) "))) { inode_put(&child_ip); return 1; } log_err( _("Corrupt directory entry not removed.\n")); } inode_put(&child_ip); return 0; } static int hash_table_index(uint32_t hash, struct gfs2_inode *ip) { return hash >> (32 - ip->i_di.di_depth); } static int hash_table_max(int lindex, struct gfs2_inode *ip, struct gfs2_buffer_head *bh) { struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; return (1 << (ip->i_di.di_depth - be16_to_cpu(leaf->lf_depth))) + lindex - 1; } static int check_leaf_depth(struct gfs2_inode *ip, uint64_t leaf_no, int ref_count, struct gfs2_buffer_head *lbh) { struct gfs2_leaf *leaf = (struct gfs2_leaf *)lbh->b_data; int cur_depth = be16_to_cpu(leaf->lf_depth); int exp_count = 1 << (ip->i_di.di_depth - cur_depth); int divisor; int factor, correct_depth; if (exp_count == ref_count) return 0; factor = 0; divisor = ref_count; while (divisor > 1) { factor++; divisor >>= 1; } if (ip->i_di.di_depth < factor) /* can't be fixed--leaf must be on the wrong dinode. */ return -1; correct_depth = ip->i_di.di_depth - factor; if (cur_depth == correct_depth) return 0; log_err(_("Leaf block %llu (0x%llx) in dinode %llu (0x%llx) has the " "wrong depth: is %d (length %d), should be %d (length " "%d).\n"), (unsigned long long)leaf_no, (unsigned long long)leaf_no, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, cur_depth, ref_count, correct_depth, exp_count); if (!query( _("Fix the leaf block? (y/n)"))) { log_err( _("The leaf block was not fixed.\n")); return 0; } leaf->lf_depth = cpu_to_be16(correct_depth); bmodified(lbh); log_err( _("The leaf block depth was fixed.\n")); return 1; } /* wrong_leaf: Deal with a dirent discovered to be on the wrong leaf block * * Returns: 1 if the dirent is to be removed, 0 if it needs to be kept, * or -1 on error */ static int wrong_leaf(struct gfs2_inode *ip, struct gfs2_inum *entry, const char *tmp_name, int lindex, int lindex_max, int hash_index, struct gfs2_buffer_head *bh, struct dir_status *ds, struct gfs2_dirent *dent, struct gfs2_dirent *de, struct gfs2_dirent *prev_de, uint32_t *count, uint8_t q) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *dest_lbh; uint64_t planned_leaf, real_leaf; int li, dest_ref, error; uint64_t *tbl; log_err(_("Directory entry '%s' at block %lld (0x%llx) is on the " "wrong leaf block.\n"), tmp_name, (unsigned long long)entry->no_addr, (unsigned long long)entry->no_addr); log_err(_("Leaf index is: 0x%x. The range for this leaf block is " "0x%x - 0x%x\n"), hash_index, lindex, lindex_max); if (!query( _("Move the misplaced directory entry to " "a valid leaf block? (y/n) "))) { log_err( _("Misplaced directory entry not moved.\n")); return 0; } /* check the destination leaf block's depth */ tbl = get_dir_hash(ip); if (tbl == NULL) { perror("get_dir_hash"); return -1; } planned_leaf = be64_to_cpu(tbl[hash_index]); log_err(_("Moving it from leaf %llu (0x%llx) to %llu (0x%llx)\n"), (unsigned long long)be64_to_cpu(tbl[lindex]), (unsigned long long)be64_to_cpu(tbl[lindex]), (unsigned long long)planned_leaf, (unsigned long long)planned_leaf); /* Can't trust lf_depth; we have to count */ dest_ref = 0; for (li = 0; li < (1 << ip->i_di.di_depth); li++) { if (be64_to_cpu(tbl[li]) == planned_leaf) dest_ref++; else if (dest_ref) break; } dest_lbh = bread(sdp, planned_leaf); check_leaf_depth(ip, planned_leaf, dest_ref, dest_lbh); brelse(dest_lbh); free(tbl); /* check if it's already on the correct leaf block */ error = dir_search(ip, tmp_name, de->de_name_len, NULL, &de->de_inum); if (!error) { log_err(_("The misplaced directory entry already appears on " "the correct leaf block.\n")); log_err( _("The bad duplicate directory entry " "'%s' was cleared.\n"), tmp_name); return 1; /* nuke the dent upon return */ } if (dir_add(ip, tmp_name, de->de_name_len, &de->de_inum, de->de_type) == 0) { log_err(_("The misplaced directory entry was moved to a " "valid leaf block.\n")); gfs2_get_leaf_nr(ip, hash_index, &real_leaf); if (real_leaf != planned_leaf) { log_err(_("The planned leaf was split. The new leaf " "is: %llu (0x%llx). di_blocks=%llu\n"), (unsigned long long)real_leaf, (unsigned long long)real_leaf, (unsigned long long)ip->i_di.di_blocks); fsck_blockmap_set(ip, real_leaf, _("split leaf"), gfs2_indir_blk); } /* If the misplaced dirent was supposed to be earlier in the hash table, we need to adjust our counts for the blocks that have already been processed. If it's supposed to appear later, we'll count it has part of our normal processing when we get to that leaf block later on in the hash table. */ if (hash_index > lindex) { log_err(_("Accounting deferred.\n")); return 1; /* nuke the dent upon return */ } /* If we get here, it's because we moved a dent to another leaf, but that leaf has already been processed. So we have to nuke the dent from this leaf when we return, but we still need to do the "good dent" accounting. */ if (de->de_type == (sdp->gfs1 ? GFS_FILE_DIR : DT_DIR)) { error = set_parent_dir(sdp, de->de_inum, ip->i_di.di_num); if (error > 0) /* This is a bit of a kludge, but returning 0 in this case causes the caller to go through function set_parent_dir a second time and deal properly with the hard link. */ return 0; } error = incr_link_count(*entry, ip, _("moved valid reference")); if (error > 0 && bad_formal_ino(ip, dent, *entry, tmp_name, q, de, bh) == 1) return 1; /* nuke it */ /* You cannot do this: (*count)++; The reason is: *count is the count of dentries on the leaf, and we moved the dentry to a previous leaf within the same directory dinode. So the directory counts still get incremented, but not leaf entries. When we called dir_add above, it should have fixed that prev leaf's lf_entries. */ ds->entry_count++; return 1; } else { log_err(_("Error moving directory entry.\n")); return 1; /* nuke it */ } } /* basic_dentry_checks - fundamental checks for directory entries * * @ip: pointer to the incode inode structure * @entry: pointer to the inum info * @tmp_name: user-friendly file name * @count: pointer to the entry count * @de: pointer to the directory entry * * Returns: 1 means corruption, nuke the dentry, 0 means checks pass */ static int basic_dentry_checks(struct gfs2_inode *ip, struct gfs2_dirent *dent, struct gfs2_inum *entry, const char *tmp_name, uint32_t *count, struct gfs2_dirent *de, struct dir_status *ds, uint8_t *q, struct gfs2_buffer_head *bh) { struct gfs2_sbd *sdp = ip->i_sbd; uint32_t calculated_hash; struct gfs2_inode *entry_ip = NULL; int error; struct inode_info *ii; if (!valid_block(ip->i_sbd, entry->no_addr)) { log_err( _("Block # referenced by directory entry %s in inode " "%lld (0x%llx) is invalid\n"), tmp_name, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (query( _("Clear directory entry to out of range block? " "(y/n) "))) { return 1; } else { log_err( _("Directory entry to out of range block remains\n")); (*count)++; ds->entry_count++; /* can't do this because the block is out of range: incr_link_count(entry); */ return 0; } } if (de->de_rec_len < GFS2_DIRENT_SIZE(de->de_name_len)) { log_err( _("Dir entry with bad record or name length\n" "\tRecord length = %u\n\tName length = %u\n"), de->de_rec_len, de->de_name_len); if (!query( _("Clear the directory entry? (y/n) "))) { log_err( _("Directory entry not fixed.\n")); return 0; } fsck_blockmap_set(ip, ip->i_di.di_num.no_addr, _("corrupt directory entry"), gfs2_inode_invalid); log_err( _("Bad directory entry deleted.\n")); return 1; } calculated_hash = gfs2_disk_hash(tmp_name, de->de_name_len); if (de->de_hash != calculated_hash){ log_err( _("Dir entry with bad hash or name length\n" "\tHash found = %u (0x%x)\n" "\tFilename = %s\n"), de->de_hash, de->de_hash, tmp_name); log_err( _("\tName length found = %u\n" "\tHash expected = %u (0x%x)\n"), de->de_name_len, calculated_hash, calculated_hash); if (!query( _("Fix directory hash for %s? (y/n) "), tmp_name)) { log_err( _("Directory entry hash for %s not " "fixed.\n"), tmp_name); return 0; } de->de_hash = calculated_hash; gfs2_dirent_out(de, (char *)dent); bmodified(bh); log_err( _("Directory entry hash for %s fixed.\n"), tmp_name); } *q = block_type(entry->no_addr); /* Get the status of the directory inode */ /** * 1. Blocks marked "invalid" were invalidated due to duplicate * block references. Pass1b should have already taken care of deleting * their metadata, so here we only need to delete the directory entries * pointing to them. We delete the metadata in pass1b because we need * to eliminate the inode referencing the duplicate-referenced block * from the list of candidates to keep. So we have a delete-as-we-go * policy. * * 2. Blocks marked "bad" need to have their entire * metadata tree deleted. */ if (*q == gfs2_inode_invalid || *q == gfs2_bad_block) { /* This entry's inode has bad blocks in it */ /* Handle bad blocks */ log_err( _("Found directory entry '%s' pointing to invalid " "block %lld (0x%llx)\n"), tmp_name, (unsigned long long)entry->no_addr, (unsigned long long)entry->no_addr); if (!query( _("Delete inode containing bad blocks? (y/n)"))) { log_warn( _("Entry to inode containing bad blocks remains\n")); return 0; } if (*q == gfs2_bad_block) { if (ip->i_di.di_num.no_addr == entry->no_addr) entry_ip = ip; else entry_ip = fsck_load_inode(sdp, entry->no_addr); if (ip->i_di.di_eattr) { check_inode_eattr(entry_ip, &pass2_fxns_delete); } check_metatree(entry_ip, &pass2_fxns_delete); if (entry_ip != ip) fsck_inode_put(&entry_ip); } fsck_blockmap_set(ip, entry->no_addr, _("bad directory entry"), gfs2_block_free); log_err( _("Inode %lld (0x%llx) was deleted.\n"), (unsigned long long)entry->no_addr, (unsigned long long)entry->no_addr); return 1; } if (*q < gfs2_inode_dir || *q > gfs2_inode_sock) { log_err( _("Directory entry '%s' referencing inode %llu " "(0x%llx) in dir inode %llu (0x%llx) block type " "%d: %s.\n"), tmp_name, (unsigned long long)entry->no_addr, (unsigned long long)entry->no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, *q, *q == gfs2_inode_invalid ? _("was previously marked invalid") : _("was deleted or is not an inode")); if (!query( _("Clear directory entry to non-inode block? " "(y/n) "))) { log_err( _("Directory entry to non-inode block remains\n")); return 0; } /* Don't decrement the link here: Here in pass2, we increment only when we know it's okay. decr_link_count(ip->i_di.di_num.no_addr); */ /* If it was previously marked invalid (i.e. known to be bad, not just a free block, etc.) then the temptation would be to delete any metadata it holds. The trouble is: if it's invalid, we may or _may_not_ have traversed its metadata tree, and therefore may or may not have marked the blocks it points to as a metadata type, or as a duplicate. If there is really a duplicate reference, but we didn't process the metadata tree because it's invalid, some other inode has a reference to the metadata block, in which case freeing it would do more harm than good. IOW we cannot count on "delete_block_if_notdup" knowing whether it's really a duplicate block if we never traversed the metadata tree for the invalid inode. */ return 1; } error = check_file_type(de->de_type, *q, sdp->gfs1); if (error < 0) { log_err( _("Error: directory entry type is " "incompatible with block type at block %lld " "(0x%llx) in directory inode %llu (0x%llx).\n"), (unsigned long long)entry->no_addr, (unsigned long long)entry->no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); log_err( _("Directory entry type is %d, block type is %d.\n"), de->de_type, *q); stack; return -1; } if (error > 0) { log_err( _("Type '%s' in dir entry (%s, %llu/0x%llx) conflicts" " with type '%s' in dinode. (Dir entry is stale.)\n"), de_type_string(de->de_type), tmp_name, (unsigned long long)entry->no_addr, (unsigned long long)entry->no_addr, block_type_string(*q)); if (!query( _("Clear stale directory entry? (y/n) "))) { log_err( _("Stale directory entry remains\n")); return 0; } if (ip->i_di.di_num.no_addr == entry->no_addr) entry_ip = ip; else entry_ip = fsck_load_inode(sdp, entry->no_addr); check_inode_eattr(entry_ip, &delete_eattrs); if (entry_ip != ip) fsck_inode_put(&entry_ip); return 1; } /* We need to verify the formal inode number matches. If it doesn't, it needs to be deleted. */ ii = inodetree_find(entry->no_addr); if (ii && ii->di_num.no_formal_ino != entry->no_formal_ino) { log_err( _("Directory entry '%s' pointing to block %llu " "(0x%llx) in directory %llu (0x%llx) has the " "wrong 'formal' inode number.\n"), tmp_name, (unsigned long long)entry->no_addr, (unsigned long long)entry->no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); log_err( _("The directory entry has %llu (0x%llx) but the " "inode has %llu (0x%llx)\n"), (unsigned long long)entry->no_formal_ino, (unsigned long long)entry->no_formal_ino, (unsigned long long)ii->di_num.no_formal_ino, (unsigned long long)ii->di_num.no_formal_ino); return 1; } return 0; } /* FIXME: should maybe refactor this a bit - but need to deal with * FIXMEs internally first */ static int check_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent, struct gfs2_dirent *prev_de, struct gfs2_buffer_head *bh, char *filename, uint32_t *count, int lindex, void *priv) { struct gfs2_sbd *sdp = ip->i_sbd; uint8_t q = 0; char tmp_name[MAX_FILENAME]; struct gfs2_inum entry; struct dir_status *ds = (struct dir_status *) priv; int error; struct gfs2_inode *entry_ip = NULL; struct gfs2_dirent dentry, *de; int hash_index; /* index into the hash table based on the hash */ int lindex_max; /* largest acceptable hash table index for hash */ memset(&dentry, 0, sizeof(struct gfs2_dirent)); gfs2_dirent_in(&dentry, (char *)dent); de = &dentry; entry.no_addr = de->de_inum.no_addr; entry.no_formal_ino = de->de_inum.no_formal_ino; /* Start of checks */ memset(tmp_name, 0, MAX_FILENAME); if (de->de_name_len < MAX_FILENAME) strncpy(tmp_name, filename, de->de_name_len); else strncpy(tmp_name, filename, MAX_FILENAME - 1); error = basic_dentry_checks(ip, dent, &entry, tmp_name, count, de, ds, &q, bh); if (error) goto nuke_dentry; if (!strcmp(".", tmp_name)) { log_debug( _("Found . dentry in directory %lld (0x%llx)\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (ds->dotdir) { log_err( _("Already found '.' entry in directory %llu" " (0x%llx)\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (!query( _("Clear duplicate '.' entry? (y/n) "))) { log_err( _("Duplicate '.' entry remains\n")); /* FIXME: Should we continue on here * and check the rest of the '.' entry? */ goto dentry_is_valid; } if (ip->i_di.di_num.no_addr == entry.no_addr) entry_ip = ip; else entry_ip = fsck_load_inode(sdp, entry.no_addr); check_inode_eattr(entry_ip, &delete_eattrs); if (entry_ip != ip) fsck_inode_put(&entry_ip); goto nuke_dentry; } /* GFS2 does not rely on '.' being in a certain * location */ /* check that '.' refers to this inode */ if (entry.no_addr != ip->i_di.di_num.no_addr) { log_err( _("'.' entry's value incorrect in directory %llu" " (0x%llx). Points to %llu" " (0x%llx) when it should point to %llu" " (0x%llx).\n"), (unsigned long long)entry.no_addr, (unsigned long long)entry.no_addr, (unsigned long long)entry.no_addr, (unsigned long long)entry.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (!query( _("Remove '.' reference? (y/n) "))) { log_err( _("Invalid '.' reference remains\n")); /* Not setting ds->dotdir here since * this '.' entry is invalid */ goto dentry_is_valid; } if (ip->i_di.di_num.no_addr == entry.no_addr) entry_ip = ip; else entry_ip = fsck_load_inode(sdp, entry.no_addr); check_inode_eattr(entry_ip, &delete_eattrs); if (entry_ip != ip) fsck_inode_put(&entry_ip); goto nuke_dentry; } ds->dotdir = 1; goto dentry_is_valid; } if (!strcmp("..", tmp_name)) { log_debug( _("Found '..' dentry in directory %lld (0x%llx)\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (ds->dotdotdir) { log_err( _("Already had a '..' entry in directory %llu" "(0x%llx)\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (!query( _("Clear duplicate '..' entry? (y/n) "))) { log_err( _("Duplicate '..' entry remains\n")); /* FIXME: Should we continue on here * and check the rest of the '..' * entry? */ goto dentry_is_valid; } if (ip->i_di.di_num.no_addr == entry.no_addr) entry_ip = ip; else entry_ip = fsck_load_inode(sdp, entry.no_addr); check_inode_eattr(entry_ip, &delete_eattrs); if (entry_ip != ip) fsck_inode_put(&entry_ip); goto nuke_dentry; } if (q != gfs2_inode_dir) { log_err( _("Found '..' entry in directory %llu (0x%llx) " "pointing to something that's not a directory"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (!query( _("Clear bad '..' directory entry? (y/n) "))) { log_err( _("Bad '..' directory entry remains\n")); goto dentry_is_valid; } if (ip->i_di.di_num.no_addr == entry.no_addr) entry_ip = ip; else entry_ip = fsck_load_inode(sdp, entry.no_addr); check_inode_eattr(entry_ip, &delete_eattrs); if (entry_ip != ip) fsck_inode_put(&entry_ip); goto nuke_dentry; } /* GFS2 does not rely on '..' being in a certain location */ /* Add the address this entry is pointing to * to this inode's dotdot_parent in * dir_info */ if (set_dotdot_dir(sdp, ip->i_di.di_num.no_addr, entry)) { stack; return -1; } ds->dotdotdir = 1; goto dentry_is_valid; } /* If this is an exhash directory, make sure the dentries in the leaf block have a hash table index that fits */ if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { hash_index = hash_table_index(de->de_hash, ip); lindex_max = hash_table_max(lindex, ip, bh); if (hash_index < lindex || hash_index > lindex_max) { int nuke_dent; nuke_dent = wrong_leaf(ip, &entry, tmp_name, lindex, lindex_max, hash_index, bh, ds, dent, de, prev_de, count, q); if (nuke_dent) goto nuke_dentry; } } /* After this point we're only concerned with directories */ if (q != gfs2_inode_dir) { log_debug( _("Found non-dir inode dentry pointing to %lld " "(0x%llx)\n"), (unsigned long long)entry.no_addr, (unsigned long long)entry.no_addr); goto dentry_is_valid; } /*log_debug( _("Found plain directory dentry\n"));*/ error = set_parent_dir(sdp, entry, ip->i_di.di_num); if (error > 0) { log_err( _("%s: Hard link to block %llu (0x%llx" ") detected.\n"), tmp_name, (unsigned long long)entry.no_addr, (unsigned long long)entry.no_addr); if (query( _("Clear hard link to directory? (y/n) "))) goto nuke_dentry; else { log_err( _("Hard link to directory remains\n")); goto dentry_is_valid; } } else if (error < 0) { stack; return -1; } dentry_is_valid: /* This directory inode links to this inode via this dentry */ error = incr_link_count(entry, ip, _("valid reference")); if (error > 0 && bad_formal_ino(ip, dent, entry, tmp_name, q, de, bh) == 1) goto nuke_dentry; (*count)++; ds->entry_count++; /* End of checks */ return 0; nuke_dentry: dirent2_del(ip, bh, prev_de, dent); log_err( _("Bad directory entry '%s' cleared.\n"), tmp_name); return 1; } /* pad_with_leafblks - pad a hash table with pointers to new leaf blocks * * @ip: pointer to the dinode structure * @tbl: pointer to the hash table in memory * @lindex: index location within the hash table to pad * @len: number of pointers to be padded */ static void pad_with_leafblks(struct gfs2_inode *ip, uint64_t *tbl, int lindex, int len) { int new_len, i; uint32_t proper_start = lindex; uint64_t new_leaf_blk; log_err(_("Padding inode %llu (0x%llx) hash table at offset %d (0x%x) " "for %d pointers.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, lindex, lindex, len); while (len) { new_len = 1; /* Determine the next factor of 2 down from extras. We can't just write out a leaf block on a power-of-two boundary. We also need to make sure it has a length that will ensure a "proper start" block as well. */ while ((new_len << 1) <= len) { /* Translation: If doubling the size of the new leaf will make its start boundary wrong, we have to settle for a smaller length (and iterate more). */ proper_start = (lindex & ~((new_len << 1) - 1)); if (lindex != proper_start) break; new_len <<= 1; } write_new_leaf(ip, lindex, new_len, "after", &new_leaf_blk); log_err(_("New leaf block was allocated at %llu (0x%llx) for " "index %d (0x%x), length %d\n"), (unsigned long long)new_leaf_blk, (unsigned long long)new_leaf_blk, lindex, lindex, new_len); fsck_blockmap_set(ip, new_leaf_blk, _("pad leaf"), gfs2_leaf_blk); /* Fix the hash table in memory to have the new leaf */ for (i = 0; i < new_len; i++) tbl[lindex + i] = cpu_to_be64(new_leaf_blk); len -= new_len; lindex += new_len; } } /* lost_leaf - repair a leaf block that's on the wrong directory inode * * If the correct index is less than the starting index, we have a problem. * Since we process the index sequentially, the previous index has already * been processed, fixed, and is now correct. But this leaf wants to overwrite * a previously written good leaf. The only thing we can do is move all the * directory entries to lost+found so we don't overwrite the good leaf. Then * we need to pad the gap we leave. */ static int lost_leaf(struct gfs2_inode *ip, uint64_t *tbl, uint64_t leafno, int ref_count, int lindex, struct gfs2_buffer_head *bh) { char *filename; char *bh_end = bh->b_data + ip->i_sbd->bsize; struct gfs2_dirent de, *dent; int error; log_err(_("Leaf block %llu (0x%llx) seems to be out of place and its " "contents need to be moved to lost+found.\n"), (unsigned long long)leafno, (unsigned long long)leafno); if (!query( _("Attempt to fix it? (y/n) "))) { log_err( _("Directory leaf was not fixed.\n")); return 0; } make_sure_lf_exists(ip); dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_leaf)); while (1) { char tmp_name[PATH_MAX]; memset(&de, 0, sizeof(struct gfs2_dirent)); gfs2_dirent_in(&de, (char *)dent); filename = (char *)dent + sizeof(struct gfs2_dirent); memset(tmp_name, 0, sizeof(tmp_name)); if (de.de_name_len > sizeof(filename)) { log_debug(_("Encountered bad filename length; " "stopped processing.\n")); break; } memcpy(tmp_name, filename, de.de_name_len); if ((de.de_name_len == 1 && filename[0] == '.')) { log_debug(_("Skipping entry '.'\n")); } else if (de.de_name_len == 2 && filename[0] == '.' && filename[1] == '.') { log_debug(_("Skipping entry '..'\n")); } else if (!de.de_inum.no_formal_ino) { /* sentinel */ log_debug(_("Skipping sentinel '%s'\n"), tmp_name); } else { uint32_t count; struct dir_status ds = {0}; uint8_t q = 0; error = basic_dentry_checks(ip, dent, &de.de_inum, tmp_name, &count, &de, &ds, &q, bh); if (error) { log_err(_("Not relocating corrupt entry " "\"%s\".\n"), tmp_name); } else { error = dir_add(lf_dip, filename, de.de_name_len, &de.de_inum, de.de_type); if (error && error != -EEXIST) { log_err(_("Error %d encountered while " "trying to relocate \"%s\" " "to lost+found.\n"), error, tmp_name); return error; } /* This inode is linked from lost+found */ incr_link_count(de.de_inum, lf_dip, _("from lost+found")); /* If it's a directory, lost+found is back-linked to it via .. */ if (q == gfs2_inode_dir) incr_link_count(lf_dip->i_di.di_num, NULL, _("to lost+found")); log_err(_("Relocated \"%s\", block %llu " "(0x%llx) to lost+found.\n"), tmp_name, (unsigned long long)de.de_inum.no_addr, (unsigned long long)de.de_inum.no_addr); } } if ((char *)dent + de.de_rec_len >= bh_end) break; dent = (struct gfs2_dirent *)((char *)dent + de.de_rec_len); } log_err(_("Directory entries from misplaced leaf block were relocated " "to lost+found.\n")); /* Free the lost leaf. */ fsck_blockmap_set(ip, leafno, _("lost leaf"), gfs2_block_free); ip->i_di.di_blocks--; bmodified(ip->i_bh); /* Now we have to deal with the bad hash table entries pointing to the misplaced leaf block. But we can't just fill the gap with a single leaf. We have to write on nice power-of-two boundaries, and we have to pad out any extra pointers. */ pad_with_leafblks(ip, tbl, lindex, ref_count); return 1; } static int basic_check_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent, struct gfs2_dirent *prev_de, struct gfs2_buffer_head *bh, char *filename, uint32_t *count, int lindex, void *priv) { uint8_t q = 0; char tmp_name[MAX_FILENAME]; struct gfs2_inum entry; struct dir_status *ds = (struct dir_status *) priv; struct gfs2_dirent dentry, *de; int error; memset(&dentry, 0, sizeof(struct gfs2_dirent)); gfs2_dirent_in(&dentry, (char *)dent); de = &dentry; entry.no_addr = de->de_inum.no_addr; entry.no_formal_ino = de->de_inum.no_formal_ino; /* Start of checks */ memset(tmp_name, 0, MAX_FILENAME); if (de->de_name_len < MAX_FILENAME) strncpy(tmp_name, filename, de->de_name_len); else strncpy(tmp_name, filename, MAX_FILENAME - 1); error = basic_dentry_checks(ip, dent, &entry, tmp_name, count, de, ds, &q, bh); if (error) { dirent2_del(ip, bh, prev_de, dent); log_err( _("Bad directory entry '%s' cleared.\n"), tmp_name); return 1; } else { (*count)++; return 0; } } static int pass2_repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no, int lindex, int ref_count, const char *msg, void *private) { return repair_leaf(ip, leaf_no, lindex, ref_count, msg, 1); } /* The purpose of leafck_fxns is to provide a means for function fix_hashtable * to do basic sanity checks on leaf blocks before manipulating them, for * example, splitting them. If they're corrupt, splitting them or trying to * move their contents can cause a segfault. We can't really use the standard * pass2_fxns because that will do things we don't want. For example, it will * find '.' and '..' and increment the directory link count, which would be * done a second time when the dirent is really checked in pass2_fxns. * We don't want it to do the "wrong leaf" thing, or set_parent_dir either. * We just want a basic sanity check on pointers and lengths. */ struct metawalk_fxns leafck_fxns = { .check_leaf_depth = check_leaf_depth, .check_dentry = basic_check_dentry, .repair_leaf = pass2_repair_leaf, }; /* fix_hashtable - fix a corrupt hash table * * The main intent of this function is to sort out hash table problems. * That is, it needs to determine if leaf blocks are in the wrong place, * if the count of pointers is wrong, and if there are extra pointers. * Everything should be placed on correct power-of-two boundaries appropriate * to their leaf depth, and extra pointers should be correctly padded with new * leaf blocks. * * @ip: the directory dinode structure pointer * @tbl: hash table that's already read into memory * @hsize: hash table size, as dictated by the dinode's di_depth * @leafblk: the leaf block number that appears at this lindex in the tbl * @lindex: leaf index that has a problem * @proper_start: where this leaf's pointers should start, as far as the * hash table is concerned (sight unseen; trusting the leaf * really belongs here). * @len: count of pointers in the hash table to this leafblk * @proper_len: pointer to return the proper number of pointers, as the kernel * calculates it, based on the leaf depth. * @factor: the proper depth, given this number of pointers (rounded down). * * Returns: 0 - no changes made, or X if changes were made */ static int fix_hashtable(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize, uint64_t leafblk, int lindex, uint32_t proper_start, int len, int *proper_len, int factor) { struct gfs2_buffer_head *lbh; struct gfs2_leaf leaf; struct gfs2_dirent dentry, *de; int changes = 0, error, i, extras, hash_index; uint64_t new_leaf_blk; uint64_t leaf_no; uint32_t leaf_proper_start; *proper_len = len; log_err(_("Dinode %llu (0x%llx) has a hash table error at index " "0x%x, length 0x%x: leaf block %llu (0x%llx)\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, lindex, len, (unsigned long long)leafblk, (unsigned long long)leafblk); if (!query( _("Fix the hash table? (y/n) "))) { log_err(_("Hash table not fixed.\n")); return 0; } memset(&leaf, 0, sizeof(leaf)); leaf_no = leafblk; error = check_leaf(ip, lindex, &leafck_fxns, &leaf_no, &leaf, &len); if (error) { log_debug("Leaf repaired while fixing the hash table.\n"); error = 0; } lbh = bread(ip->i_sbd, leafblk); /* If the leaf's depth is out of range for this dinode, it's obviously attached to the wrong dinode. Move the dirents to lost+found. */ if (leaf.lf_depth > ip->i_di.di_depth) { log_err(_("This leaf block's depth (%d) is too big for this " "dinode's depth (%d)\n"), leaf.lf_depth, ip->i_di.di_depth); error = lost_leaf(ip, tbl, leafblk, len, lindex, lbh); brelse(lbh); return error; } memset(&dentry, 0, sizeof(struct gfs2_dirent)); de = (struct gfs2_dirent *)(lbh->b_data + sizeof(struct gfs2_leaf)); gfs2_dirent_in(&dentry, (char *)de); /* If this is an empty leaf, we can just delete it and pad. */ if ((dentry.de_rec_len == cpu_to_be16(ip->i_sbd->bsize - sizeof(struct gfs2_leaf))) && (dentry.de_inum.no_formal_ino == 0)) { brelse(lbh); gfs2_free_block(ip->i_sbd, leafblk); log_err(_("Out of place leaf block %llu (0x%llx) had no " "entries, so it was deleted.\n"), (unsigned long long)leafblk, (unsigned long long)leafblk); pad_with_leafblks(ip, tbl, lindex, len); log_err(_("Reprocessing index 0x%x (case 1).\n"), lindex); return 1; } /* Calculate the proper number of pointers based on the leaf depth. */ *proper_len = 1 << (ip->i_di.di_depth - leaf.lf_depth); /* Look at the first dirent and check its hash value to see if it's at the proper starting offset. */ hash_index = hash_table_index(dentry.de_hash, ip); /* Need to use len here, not *proper_len because the leaf block may be valid within the range, but starts too soon in the hash table. */ if (hash_index < lindex || hash_index > lindex + len) { log_err(_("This leaf block has hash index %d, which is out of " "bounds for where it appears in the hash table " "(%d - %d)\n"), hash_index, lindex, lindex + *proper_len); error = lost_leaf(ip, tbl, leafblk, len, lindex, lbh); brelse(lbh); return error; } /* Now figure out where this leaf should start, and pad any pointers up to that point with new leaf blocks. */ leaf_proper_start = (hash_index & ~(*proper_len - 1)); if (lindex < leaf_proper_start) { log_err(_("Leaf pointers start at %d (0x%x), should be %d " "(%x).\n"), lindex, lindex, leaf_proper_start, leaf_proper_start); pad_with_leafblks(ip, tbl, lindex, leaf_proper_start - lindex); brelse(lbh); return 1; /* reprocess the starting lindex */ } /* If the proper start according to the leaf's hash index is later than the proper start according to the hash table, it's once again lost and we have to relocate it. The same applies if the leaf's hash index is prior to the proper state, but the leaf is already at its maximum depth. */ if ((leaf_proper_start < proper_start) || ((*proper_len > len || lindex > leaf_proper_start) && leaf.lf_depth == ip->i_di.di_depth)) { log_err(_("Leaf block should start at 0x%x, but it appears at " "0x%x in the hash table.\n"), leaf_proper_start, proper_start); error = lost_leaf(ip, tbl, leafblk, len, lindex, lbh); brelse(lbh); return error; } /* If we SHOULD have more pointers than we do, we can solve the problem by splitting the block to a lower depth. Then we may have the right number of pointers. If the leaf block pointers start later than they should, we can split the leaf to give it a smaller footprint in the hash table. */ if ((*proper_len > len || lindex > leaf_proper_start) && ip->i_di.di_depth > leaf.lf_depth) { log_err(_("For depth %d, length %d, the proper start is: " "0x%x.\n"), factor, len, proper_start); changes++; new_leaf_blk = find_free_blk(ip->i_sbd); dir_split_leaf(ip, lindex, leafblk, lbh); /* re-read the leaf to pick up dir_split_leaf's changes */ gfs2_leaf_in(&leaf, lbh); *proper_len = 1 << (ip->i_di.di_depth - leaf.lf_depth); log_err(_("Leaf block %llu (0x%llx) was split from length " "%d to %d\n"), (unsigned long long)leafblk, (unsigned long long)leafblk, len, *proper_len); if (*proper_len < 0) { log_err(_("Programming error: proper_len=%d, " "di_depth = %d, lf_depth = %d.\n"), *proper_len, ip->i_di.di_depth, leaf.lf_depth); exit(FSCK_ERROR); } log_err(_("New split-off leaf block was allocated at %lld " "(0x%llx) for index %d (0x%x)\n"), (unsigned long long)new_leaf_blk, (unsigned long long)new_leaf_blk, lindex, lindex); fsck_blockmap_set(ip, new_leaf_blk, _("split leaf"), gfs2_leaf_blk); log_err(_("Hash table repaired.\n")); /* Fix up the hash table in memory to include the new leaf */ for (i = 0; i < *proper_len; i++) tbl[lindex + i] = cpu_to_be64(new_leaf_blk); if (*proper_len < (len >> 1)) { log_err(_("One leaf split is not enough. The hash " "table will need to be reprocessed.\n")); brelse(lbh); return changes; } lindex += (*proper_len); /* skip the new leaf from the split */ len -= (*proper_len); } if (*proper_len < len) { log_err(_("There are %d pointers, but leaf 0x%llx's " "depth, %d, only allows %d\n"), len, (unsigned long long)leafblk, leaf.lf_depth, *proper_len); } brelse(lbh); /* At this point, lindex should be at the proper end of the pointers. Now we need to replace any extra duplicate pointers to the old (original) leafblk (that ran off the end) with new leaf blocks. */ lindex += (*proper_len); /* Skip past the normal good pointers */ len -= (*proper_len); extras = 0; for (i = 0; i < len; i++) { if (be64_to_cpu(tbl[lindex + i]) == leafblk) extras++; else break; } if (extras) { log_err(_("Found %d extra pointers to leaf %llu (0x%llx)\n"), extras, (unsigned long long)leafblk, (unsigned long long)leafblk); pad_with_leafblks(ip, tbl, lindex, extras); log_err(_("Reprocessing index 0x%x (case 2).\n"), lindex); return 1; } return changes; } /* check_hash_tbl_dups - check for the same leaf in multiple places */ static int check_hash_tbl_dups(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize, int lindex, int len) { int l, len2; uint64_t leafblk, leaf_no; struct gfs2_buffer_head *lbh; struct gfs2_leaf leaf; struct gfs2_dirent dentry, *de; int hash_index; /* index into the hash table based on the hash */ leafblk = be64_to_cpu(tbl[lindex]); for (l = 0; l < hsize; l++) { if (l == lindex) { /* skip the valid reference */ l += len - 1; continue; } if (be64_to_cpu(tbl[l]) != leafblk) continue; for (len2 = 0; l + len2 < hsize; len2++) { if (l + len2 == lindex) break; if (be64_to_cpu(tbl[l + len2]) != leafblk) break; } log_err(_("Dinode %llu (0x%llx) has duplicate leaf pointers " "to block %llu (0x%llx) at offsets %u (0x%x) " "(for 0x%x) and %u (0x%x) (for 0x%x)\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)leafblk, (unsigned long long)leafblk, lindex, lindex, len, l, l, len2); /* See which set of references is valid: the one passed in or the duplicate we found. */ memset(&leaf, 0, sizeof(leaf)); leaf_no = leafblk; if (!valid_block(ip->i_sbd, leaf_no)) /* Checked later */ continue; lbh = bread(ip->i_sbd, leafblk); if (gfs2_check_meta(lbh, GFS2_METATYPE_LF)) { /* Chked later */ brelse(lbh); continue; } memset(&dentry, 0, sizeof(struct gfs2_dirent)); de = (struct gfs2_dirent *)(lbh->b_data + sizeof(struct gfs2_leaf)); gfs2_dirent_in(&dentry, (char *)de); hash_index = hash_table_index(dentry.de_hash, ip); brelse(lbh); /* check the duplicate ref first */ if (hash_index < l || hash_index > l + len2) { log_err(_("This leaf block has hash index %d, which " "is out of bounds for lindex (%d - %d)\n"), hash_index, l, l + len2); if (!query( _("Fix the hash table? (y/n) "))) { log_err(_("Hash table not fixed.\n")); return 0; } /* Adjust the ondisk block count. The original value may have been correct without the duplicates but pass1 would have counted them and adjusted the count to include them. So we must subtract them. */ ip->i_di.di_blocks--; bmodified(ip->i_bh); pad_with_leafblks(ip, tbl, l, len2); } else { log_debug(_("Hash index 0x%x is the proper " "reference to leaf 0x%llx.\n"), l, (unsigned long long)leafblk); } /* Check the original ref: both references might be bad. If both were bad, just return and if we encounter it again, we'll treat it as new. If the original ref is not bad, keep looking for (and fixing) other instances. */ if (hash_index < lindex || hash_index > lindex + len) { log_err(_("This leaf block has hash index %d, which " "is out of bounds for lindex (%d - %d).\n"), hash_index, lindex, lindex + len); if (!query( _("Fix the hash table? (y/n) "))) { log_err(_("Hash table not fixed.\n")); return 0; } ip->i_di.di_blocks--; bmodified(ip->i_bh); pad_with_leafblks(ip, tbl, lindex, len); /* At this point we know both copies are bad, so we return to start fresh */ return -EFAULT; } else { log_debug(_("Hash index 0x%x is the proper " "reference to leaf 0x%llx.\n"), lindex, (unsigned long long)leafblk); } } return 0; } /* check_hash_tbl - check that the hash table is sane * * We've got to make sure the hash table is sane. Each leaf needs to * be counted a proper power of 2. We can't just have 3 pointers to a leaf. * The number of pointers must correspond to the proper leaf depth, and they * must all fall on power-of-two boundaries. The leaf block pointers all need * to fall properly on these boundaries, otherwise the kernel code's * calculations will land it on the wrong leaf block while it's searching, * and the result will be files you can see with ls, but can't open, delete * or use them. * * The goal of this function is to check the hash table to make sure the * boundaries and lengths all line up properly, and if not, to fix it. * * Note: There's a delicate balance here, because this function gets called * BEFORE leaf blocks are checked by function check_leaf from function * check_leaf_blks: the hash table has to be sane before we can start * checking all the leaf blocks. And yet if there's hash table corruption * we may need to reference leaf blocks to fix it, which means we need * to check and/or fix a leaf block along the way. */ static int check_hash_tbl(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize, void *private) { int error = 0; int lindex, len, proper_len, i, changes = 0; uint64_t leafblk; struct gfs2_leaf leaf; struct gfs2_buffer_head *lbh; int factor; uint32_t proper_start; uint32_t next_proper_start; int anomaly; lindex = 0; while (lindex < hsize) { if (fsck_abort) return changes; len = 1; factor = 0; leafblk = be64_to_cpu(tbl[lindex]); next_proper_start = lindex; anomaly = 0; while (lindex + (len << 1) - 1 < hsize) { if (be64_to_cpu(tbl[lindex + (len << 1) - 1]) != leafblk) break; next_proper_start = (lindex & ~((len << 1) - 1)); if (lindex != next_proper_start) anomaly = 1; /* Check if there are other values written between here and the next factor. */ for (i = len; !anomaly && i + lindex < hsize && i < (len << 1); i++) if (be64_to_cpu(tbl[lindex + i]) != leafblk) anomaly = 1; if (anomaly) break; len <<= 1; factor++; } /* Check for leftover pointers after the factor of two: */ proper_len = len; /* A factor of 2 that fits nicely */ while (lindex + len < hsize && be64_to_cpu(tbl[lindex + len]) == leafblk) len++; /* See if that leaf block is valid. If not, write a new one that falls on a proper boundary. If it doesn't naturally, we may need more. */ if (!valid_block(ip->i_sbd, leafblk)) { uint64_t new_leafblk; log_err(_("Dinode %llu (0x%llx) has bad leaf pointers " "at offset %d for %d\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, lindex, len); if (!query( _("Fix the hash table? (y/n) "))) { log_err(_("Hash table not fixed.\n")); lindex += len; continue; } error = write_new_leaf(ip, lindex, proper_len, _("replacing"), &new_leafblk); if (error) return error; for (i = lindex; i < lindex + proper_len; i++) tbl[i] = cpu_to_be64(new_leafblk); lindex += proper_len; continue; } if (check_hash_tbl_dups(ip, tbl, hsize, lindex, len)) continue; /* Make sure they call on proper leaf-split boundaries. This is the calculation used by the kernel, and dir_split_leaf */ proper_start = (lindex & ~(proper_len - 1)); if (lindex != proper_start) { log_debug(_("lindex 0x%llx is not a proper starting " "point for leaf %llu (0x%llx): 0x%llx\n"), (unsigned long long)lindex, (unsigned long long)leafblk, (unsigned long long)leafblk, (unsigned long long)proper_start); changes = fix_hashtable(ip, tbl, hsize, leafblk, lindex, proper_start, len, &proper_len, factor); /* Check if we need to split more leaf blocks */ if (changes) { if (proper_len < (len >> 1)) log_err(_("More leaf splits are " "needed; ")); log_err(_("Reprocessing index 0x%x (case 3).\n"), lindex); continue; /* Make it reprocess the lindex */ } } /* Check for extra pointers to this leaf. At this point, len is the number of pointers we have. proper_len is the proper number of pointers if the hash table is assumed correct. Function fix_hashtable will read in the leaf block and determine the "actual" proper length based on the leaf depth, and adjust the hash table accordingly. */ if (len != proper_len) { log_err(_("Length %d (0x%x) is not a proper length " "for leaf %llu (0x%llx). Valid boundary " "assumed to be %d (0x%x).\n"), len, len, (unsigned long long)leafblk, (unsigned long long)leafblk, proper_len, proper_len); lbh = bread(ip->i_sbd, leafblk); gfs2_leaf_in(&leaf, lbh); if (gfs2_check_meta(lbh, GFS2_METATYPE_LF) || leaf.lf_depth > ip->i_di.di_depth) leaf.lf_depth = factor; brelse(lbh); changes = fix_hashtable(ip, tbl, hsize, leafblk, lindex, lindex, len, &proper_len, leaf.lf_depth); /* If fixing the hash table made changes, we can no longer count on the leaf block pointers all pointing to the same leaf (which is checked below). To avoid flagging another error, reprocess the offset. */ if (changes) { log_err(_("Reprocessing index 0x%x (case 4).\n"), lindex); continue; /* Make it reprocess the lindex */ } } /* Now make sure they're all the same pointer */ for (i = lindex; i < lindex + proper_len; i++) { if (fsck_abort) return changes; if (be64_to_cpu(tbl[i]) == leafblk) /* No problem */ continue; log_err(_("Dinode %llu (0x%llx) has a hash table " "inconsistency at index %d (0x%x) for %d\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, i, i, len); if (!query( _("Fix the hash table? (y/n) "))) { log_err(_("Hash table not fixed.\n")); continue; } changes++; /* Now we have to determine if the hash table is corrupt, or if the leaf has the wrong depth. */ lbh = bread(ip->i_sbd, leafblk); gfs2_leaf_in(&leaf, lbh); brelse(lbh); /* Calculate the expected pointer count based on the leaf depth. */ proper_len = 1 << (ip->i_di.di_depth - leaf.lf_depth); if (proper_len != len) { log_debug(_("Length 0x%x is not proper for " "leaf %llu (0x%llx): 0x%x\n"), len, (unsigned long long)leafblk, (unsigned long long)leafblk, proper_len); changes = fix_hashtable(ip, tbl, hsize, leafblk, lindex, lindex, len, &proper_len, leaf.lf_depth); break; } } lindex += proper_len; } if (!error && changes) error = 1; return error; } struct metawalk_fxns pass2_fxns = { .private = NULL, .check_leaf_depth = check_leaf_depth, .check_leaf = NULL, .check_metalist = NULL, .check_data = NULL, .check_eattr_indir = check_eattr_indir, .check_eattr_leaf = check_eattr_leaf, .check_dentry = check_dentry, .check_eattr_entry = NULL, .check_hash_tbl = check_hash_tbl, .repair_leaf = pass2_repair_leaf, }; /* Check system directory inode */ /* Should work for all system directories: root, master, jindex, per_node */ static int check_system_dir(struct gfs2_inode *sysinode, const char *dirname, int builder(struct gfs2_sbd *sdp)) { uint64_t iblock = 0, cur_blks; struct dir_status ds = {0}; char *filename; int filename_len; char tmp_name[256]; int error = 0; log_info( _("Checking system directory inode '%s'\n"), dirname); if (!sysinode) { log_err( _("Failed to check '%s': sysinode is null\n"), dirname); stack; return -1; } iblock = sysinode->i_di.di_num.no_addr; ds.q = block_type(iblock); pass2_fxns.private = (void *) &ds; if (ds.q == gfs2_bad_block) { cur_blks = sysinode->i_di.di_blocks; /* First check that the directory's metatree is valid */ error = check_metatree(sysinode, &pass2_fxns); if (error < 0) { stack; return error; } if (sysinode->i_di.di_blocks != cur_blks) reprocess_inode(sysinode, _("System inode")); } error = check_dir(sysinode->i_sbd, iblock, &pass2_fxns); if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; if (error < 0) { stack; return -1; } if (error > 0) fsck_blockmap_set(sysinode, iblock, dirname, gfs2_inode_invalid); if (check_inode_eattr(sysinode, &pass2_fxns)) { stack; return -1; } if (!ds.dotdir) { log_err( _("No '.' entry found for %s directory.\n"), dirname); if (query( _("Is it okay to add '.' entry? (y/n) "))) { cur_blks = sysinode->i_di.di_blocks; sprintf(tmp_name, "."); filename_len = strlen(tmp_name); /* no trailing NULL */ if (!(filename = malloc(sizeof(char) * filename_len))) { log_err( _("Unable to allocate name string\n")); stack; return -1; } if (!(memset(filename, 0, sizeof(char) * filename_len))) { log_err( _("Unable to zero name string\n")); stack; free(filename); return -1; } memcpy(filename, tmp_name, filename_len); log_warn( _("Adding '.' entry\n")); error = dir_add(sysinode, filename, filename_len, &(sysinode->i_di.di_num), (sysinode->i_sbd->gfs1 ? GFS_FILE_DIR : DT_DIR)); if (error) { log_err(_("Error adding directory %s: %s\n"), filename, strerror(errno)); free(filename); return -errno; } if (cur_blks != sysinode->i_di.di_blocks) reprocess_inode(sysinode, dirname); /* This system inode is linked to itself via '.' */ incr_link_count(sysinode->i_di.di_num, sysinode, "sysinode \".\""); ds.entry_count++; free(filename); } else log_err( _("The directory was not fixed.\n")); } if (sysinode->i_di.di_entries != ds.entry_count) { log_err( _("%s inode %llu (0x%llx" "): Entries is %d - should be %d\n"), dirname, (unsigned long long)sysinode->i_di.di_num.no_addr, (unsigned long long)sysinode->i_di.di_num.no_addr, sysinode->i_di.di_entries, ds.entry_count); if (query( _("Fix entries for %s inode %llu (0x%llx)? (y/n) "), dirname, (unsigned long long)sysinode->i_di.di_num.no_addr, (unsigned long long)sysinode->i_di.di_num.no_addr)) { sysinode->i_di.di_entries = ds.entry_count; bmodified(sysinode->i_bh); log_warn( _("Entries updated\n")); } else { log_err( _("Entries for inode %llu (0x%llx" ") left out of sync\n"), (unsigned long long) sysinode->i_di.di_num.no_addr, (unsigned long long) sysinode->i_di.di_num.no_addr); } } return 0; } /** * is_system_dir - determine if a given block is for a system directory. */ static inline int is_system_dir(struct gfs2_sbd *sdp, uint64_t block) { if (block == sdp->md.rooti->i_di.di_num.no_addr) return TRUE; if (sdp->gfs1) return FALSE; if (block == sdp->md.jiinode->i_di.di_num.no_addr || block == sdp->md.pinode->i_di.di_num.no_addr || block == sdp->master_dir->i_di.di_num.no_addr) return TRUE; return FALSE; } /* What i need to do in this pass is check that the dentries aren't * pointing to invalid blocks...and verify the contents of each * directory. and start filling in the directory info structure*/ /** * pass2 - check pathnames * * verify root inode * directory name length * entries in range */ int pass2(struct gfs2_sbd *sdp) { uint64_t dirblk, cur_blks; uint8_t q; struct dir_status ds = {0}; struct gfs2_inode *ip; char *filename; int filename_len; char tmp_name[256]; int error = 0; /* Check all the system directory inodes. */ if (!sdp->gfs1 && check_system_dir(sdp->md.jiinode, "jindex", build_jindex)) { stack; return FSCK_ERROR; } if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; if (!sdp->gfs1 && check_system_dir(sdp->md.pinode, "per_node", build_per_node)) { stack; return FSCK_ERROR; } if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; if (!sdp->gfs1 && check_system_dir(sdp->master_dir, "master", build_master)) { stack; return FSCK_ERROR; } if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; if (check_system_dir(sdp->md.rooti, "root", build_root)) { stack; return FSCK_ERROR; } if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; log_info( _("Checking directory inodes.\n")); /* Grab each directory inode, and run checks on it */ for (dirblk = 0; dirblk < last_fs_block; dirblk++) { warm_fuzzy_stuff(dirblk); if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; /* Skip the system inodes - they're checked above */ if (is_system_dir(sdp, dirblk)) continue; q = block_type(dirblk); if (q != gfs2_inode_dir) continue; /* If we created lost+found, its links should have been properly adjusted, so don't check it. */ if (lf_was_created && (dirblk == lf_dip->i_di.di_num.no_addr)) { log_debug(_("Pass2 skipping the new lost+found.\n")); continue; } log_debug( _("Checking directory inode at block %llu (0x%llx)\n"), (unsigned long long)dirblk, (unsigned long long)dirblk); memset(&ds, 0, sizeof(ds)); pass2_fxns.private = (void *) &ds; if (ds.q == gfs2_bad_block) { /* First check that the directory's metatree * is valid */ ip = fsck_load_inode(sdp, dirblk); cur_blks = ip->i_di.di_blocks; error = check_metatree(ip, &pass2_fxns); fsck_inode_put(&ip); if (error < 0) { stack; return error; } if (ip->i_di.di_blocks != cur_blks) reprocess_inode(ip, "current"); } error = check_dir(sdp, dirblk, &pass2_fxns); if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; if (error < 0) { stack; return FSCK_ERROR; } if (error > 0) { struct dir_info *di; di = dirtree_find(dirblk); if (!di) { stack; return FSCK_ERROR; } if (query( _("Remove directory entry for bad" " inode %llu (0x%llx) in %llu" " (0x%llx)? (y/n)"), (unsigned long long)dirblk, (unsigned long long)dirblk, (unsigned long long)di->treewalk_parent, (unsigned long long)di->treewalk_parent)) { error = remove_dentry_from_dir(sdp, di->treewalk_parent, dirblk); if (error < 0) { stack; return FSCK_ERROR; } if (error > 0) { log_warn( _("Unable to find dentry for %llu" " (0x%llx) in %llu" " (0x%llx)\n"), (unsigned long long)dirblk, (unsigned long long)dirblk, (unsigned long long)di->treewalk_parent, (unsigned long long)di->treewalk_parent); } log_warn( _("Directory entry removed\n")); } else log_err( _("Directory entry to invalid inode remains.\n")); log_debug( _("Directory block %lld (0x%llx) " "is now marked as 'invalid'\n"), (unsigned long long)dirblk, (unsigned long long)dirblk); /* Can't use fsck_blockmap_set here because we don't have an inode in memory. */ gfs2_blockmap_set(bl, dirblk, gfs2_inode_invalid); check_n_fix_bitmap(sdp, dirblk, 0, gfs2_inode_invalid); } ip = fsck_load_inode(sdp, dirblk); if (!ds.dotdir) { log_err(_("No '.' entry found for directory inode at " "block %llu (0x%llx)\n"), (unsigned long long)dirblk, (unsigned long long)dirblk); if (query( _("Is it okay to add '.' entry? (y/n) "))) { sprintf(tmp_name, "."); filename_len = strlen(tmp_name); /* no trailing NULL */ if (!(filename = malloc(sizeof(char) * filename_len))) { log_err(_("Unable to allocate name\n")); stack; return FSCK_ERROR; } if (!memset(filename, 0, sizeof(char) * filename_len)) { log_err( _("Unable to zero name\n")); stack; return FSCK_ERROR; } memcpy(filename, tmp_name, filename_len); cur_blks = ip->i_di.di_blocks; error = dir_add(ip, filename, filename_len, &(ip->i_di.di_num), (sdp->gfs1 ? GFS_FILE_DIR : DT_DIR)); if (error) { log_err(_("Error adding directory %s: %s\n"), filename, strerror(errno)); return -errno; } if (cur_blks != ip->i_di.di_blocks) { char dirname[80]; sprintf(dirname, _("Directory at %lld " "(0x%llx)"), (unsigned long long)dirblk, (unsigned long long)dirblk); reprocess_inode(ip, dirname); } /* directory links to itself via '.' */ incr_link_count(ip->i_di.di_num, ip, _("\". (itself)\"")); ds.entry_count++; free(filename); log_err( _("The directory was fixed.\n")); } else { log_err( _("The directory was not fixed.\n")); } } if (!fsck_abort && ip->i_di.di_entries != ds.entry_count) { log_err( _("Entries is %d - should be %d for inode " "block %llu (0x%llx)\n"), ip->i_di.di_entries, ds.entry_count, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (query( _("Fix the entry count? (y/n) "))) { ip->i_di.di_entries = ds.entry_count; bmodified(ip->i_bh); } else { log_err( _("The entry count was not fixed.\n")); } } fsck_inode_put(&ip); /* does a gfs2_dinode_out, brelse */ } return FSCK_OK; } gfs2-utils/gfs2/fsck/pass3.c0000664000175000017500000002430212154127655014524 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "osi_list.h" #include "fsck.h" #include "lost_n_found.h" #include "link.h" #include "metawalk.h" #include "util.h" static int attach_dotdot_to(struct gfs2_sbd *sdp, uint64_t newdotdot, uint64_t olddotdot, uint64_t block) { char *filename; int filename_len, err; struct gfs2_inode *ip, *pip; uint64_t cur_blks; ip = fsck_load_inode(sdp, block); pip = fsck_load_inode(sdp, newdotdot); /* FIXME: Need to add some interactive * options here and come up with a * good default for non-interactive */ /* FIXME: do i need to correct the * '..' entry for this directory in * this case? */ filename_len = strlen(".."); if (!(filename = malloc((sizeof(char) * filename_len) + 1))) { log_err( _("Unable to allocate name\n")); fsck_inode_put(&ip); fsck_inode_put(&pip); stack; return -1; } if (!memset(filename, 0, (sizeof(char) * filename_len) + 1)) { log_err( _("Unable to zero name\n")); fsck_inode_put(&ip); fsck_inode_put(&pip); stack; return -1; } memcpy(filename, "..", filename_len); if (gfs2_dirent_del(ip, filename, filename_len)) log_warn( _("Unable to remove \"..\" directory entry.\n")); else decr_link_count(olddotdot, block, _("old \"..\"")); cur_blks = ip->i_di.di_blocks; err = dir_add(ip, filename, filename_len, &pip->i_di.di_num, (sdp->gfs1 ? GFS_FILE_DIR : DT_DIR)); if (err) { log_err(_("Error adding directory %s: %s\n"), filename, strerror(errno)); exit(FSCK_ERROR); } if (cur_blks != ip->i_di.di_blocks) { char dirname[80]; sprintf(dirname, _("Directory at %lld (0x%llx)"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); reprocess_inode(ip, dirname); } incr_link_count(pip->i_di.di_num, ip, _("new \"..\"")); fsck_inode_put(&ip); fsck_inode_put(&pip); free(filename); return 0; } static struct dir_info *mark_and_return_parent(struct gfs2_sbd *sdp, struct dir_info *di) { struct dir_info *pdi; uint8_t q_dotdot, q_treewalk; int error = 0; di->checked = 1; if (!di->treewalk_parent) return NULL; if (di->dotdot_parent.no_addr == di->treewalk_parent) { q_dotdot = block_type(di->dotdot_parent.no_addr); if (q_dotdot != gfs2_inode_dir) { log_err( _("Orphaned directory at block %llu (0x%llx) " "moved to lost+found\n"), (unsigned long long)di->dinode.no_addr, (unsigned long long)di->dinode.no_addr); return NULL; } goto out; } log_warn( _("Directory '..' and treewalk connections disagree for " "inode %llu (0x%llx)\n"), (unsigned long long)di->dinode.no_addr, (unsigned long long)di->dinode.no_addr); log_notice( _("'..' has %llu (0x%llx), treewalk has %llu (0x%llx)\n"), (unsigned long long)di->dotdot_parent.no_addr, (unsigned long long)di->dotdot_parent.no_addr, (unsigned long long)di->treewalk_parent, (unsigned long long)di->treewalk_parent); q_dotdot = block_type(di->dotdot_parent.no_addr); q_treewalk = block_type(di->treewalk_parent); /* if the dotdot entry isn't a directory, but the * treewalk is, treewalk is correct - if the treewalk * entry isn't a directory, but the dotdot is, dotdot * is correct - if both are directories, which do we * choose? if neither are directories, we have a * problem - need to move this directory into lost+found */ if (q_dotdot != gfs2_inode_dir) { if (q_treewalk != gfs2_inode_dir) { log_err( _("Orphaned directory, move to " "lost+found\n")); return NULL; } else { log_warn( _("Treewalk parent is correct, fixing " "dotdot -> %llu (0x%llx)\n"), (unsigned long long)di->treewalk_parent, (unsigned long long)di->treewalk_parent); attach_dotdot_to(sdp, di->treewalk_parent, di->dotdot_parent.no_addr, di->dinode.no_addr); di->dotdot_parent.no_addr = di->treewalk_parent; } goto out; } if (q_treewalk == gfs2_inode_dir) { log_err( _("Both .. and treewalk parents are directories, " "going with treewalk...\n")); attach_dotdot_to(sdp, di->treewalk_parent, di->dotdot_parent.no_addr, di->dinode.no_addr); di->dotdot_parent.no_addr = di->treewalk_parent; goto out; } log_warn( _(".. parent is valid, but treewalk is bad - reattaching to " "lost+found")); /* FIXME: add a dinode for this entry instead? */ if (!query( _("Remove directory entry for bad inode %llu (0x%llx) in " "%llu (0x%llx)? (y/n)"), (unsigned long long)di->dinode.no_addr, (unsigned long long)di->dinode.no_addr, (unsigned long long)di->treewalk_parent, (unsigned long long)di->treewalk_parent)) { log_err( _("Directory entry to invalid inode remains\n")); return NULL; } error = remove_dentry_from_dir(sdp, di->treewalk_parent, di->dinode.no_addr); if (error < 0) { stack; return NULL; } if (error > 0) log_warn( _("Unable to find dentry for block %llu" " (0x%llx) in %llu (0x%llx)\n"), (unsigned long long)di->dinode.no_addr, (unsigned long long)di->dinode.no_addr, (unsigned long long)di->treewalk_parent, (unsigned long long)di->treewalk_parent); log_warn( _("Directory entry removed\n")); log_info( _("Marking directory unlinked\n")); return NULL; out: pdi = dirtree_find(di->dotdot_parent.no_addr); return pdi; } /** * pass3 - check connectivity of directories * * handle disconnected directories * handle lost+found directory errors (missing, not a directory, no space) */ int pass3(struct gfs2_sbd *sdp) { struct osi_node *tmp, *next = NULL; struct dir_info *di, *tdi; struct gfs2_inode *ip; uint8_t q; di = dirtree_find(sdp->md.rooti->i_di.di_num.no_addr); if (di) { log_info( _("Marking root inode connected\n")); di->checked = 1; } if (sdp->gfs1) { di = dirtree_find(sdp->md.statfs->i_di.di_num.no_addr); if (di) { log_info( _("Marking GFS1 statfs file inode " "connected\n")); di->checked = 1; } di = dirtree_find(sdp->md.jiinode->i_di.di_num.no_addr); if (di) { log_info( _("Marking GFS1 jindex file inode " "connected\n")); di->checked = 1; } di = dirtree_find(sdp->md.riinode->i_di.di_num.no_addr); if (di) { log_info( _("Marking GFS1 rindex file inode " "connected\n")); di->checked = 1; } di = dirtree_find(sdp->md.qinode->i_di.di_num.no_addr); if (di) { log_info( _("Marking GFS1 quota file inode " "connected\n")); di->checked = 1; } } else { di = dirtree_find(sdp->master_dir->i_di.di_num.no_addr); if (di) { log_info( _("Marking master directory inode " "connected\n")); di->checked = 1; } } /* Go through the directory list, working up through the parents * until we find one that's been checked already. If we don't * find a parent, put in lost+found. */ log_info( _("Checking directory linkage.\n")); for (tmp = osi_first(&dirtree); tmp; tmp = next) { next = osi_next(tmp); di = (struct dir_info *)tmp; while (!di->checked) { /* FIXME: Change this so it returns success or * failure and put the parent inode in a * param */ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; tdi = mark_and_return_parent(sdp, di); if (tdi) { log_debug( _("Directory at block %llu " "(0x%llx) connected\n"), (unsigned long long)di->dinode.no_addr, (unsigned long long)di->dinode.no_addr); di = tdi; continue; } q = block_type(di->dinode.no_addr); if (q == gfs2_bad_block) { log_err( _("Found unlinked directory " "containing bad block\n")); if (query(_("Clear unlinked directory " "with bad blocks? (y/n) "))) { log_warn( _("inode %lld (0x%llx) is " "now marked as free\n"), (unsigned long long) di->dinode.no_addr, (unsigned long long) di->dinode.no_addr); /* Can't use fsck_blockmap_set because we don't have ip */ gfs2_blockmap_set(bl, di->dinode.no_addr, gfs2_block_free); check_n_fix_bitmap(sdp, di->dinode.no_addr, 0, gfs2_block_free); break; } else log_err( _("Unlinked directory with bad block remains\n")); } if (q != gfs2_inode_dir && q != gfs2_inode_file && q != gfs2_inode_lnk && q != gfs2_inode_device && q != gfs2_inode_fifo && q != gfs2_inode_sock) { log_err( _("Unlinked block marked as an inode " "is not an inode\n")); if (!query(_("Clear the unlinked block?" " (y/n) "))) { log_err( _("The block was not " "cleared\n")); break; } log_warn( _("inode %lld (0x%llx) is now " "marked as free\n"), (unsigned long long)di->dinode.no_addr, (unsigned long long)di->dinode.no_addr); /* Can't use fsck_blockmap_set because we don't have ip */ gfs2_blockmap_set(bl, di->dinode.no_addr, gfs2_block_free); check_n_fix_bitmap(sdp, di->dinode.no_addr, 0, gfs2_block_free); log_err( _("The block was cleared\n")); break; } log_err( _("Found unlinked directory at block %llu" " (0x%llx)\n"), (unsigned long long)di->dinode.no_addr, (unsigned long long)di->dinode.no_addr); ip = fsck_load_inode(sdp, di->dinode.no_addr); /* Don't skip zero size directories with eattrs */ if (!ip->i_di.di_size && !ip->i_di.di_eattr){ log_err( _("Unlinked directory has zero " "size.\n")); if (query( _("Remove zero-size unlinked " "directory? (y/n) "))) { fsck_blockmap_set(ip, di->dinode.no_addr, _("zero-sized unlinked inode"), gfs2_block_free); fsck_inode_put(&ip); break; } else { log_err( _("Zero-size unlinked " "directory remains\n")); } } if (query( _("Add unlinked directory to " "lost+found? (y/n) "))) { if (add_inode_to_lf(ip)) { fsck_inode_put(&ip); stack; return FSCK_ERROR; } log_warn( _("Directory relinked to lost+found\n")); } else { log_err( _("Unlinked directory remains unlinked\n")); } fsck_inode_put(&ip); break; } } if (lf_dip) log_debug( _("At end of pass3, lost+found entries is %u\n"), lf_dip->i_di.di_entries); return FSCK_OK; } gfs2-utils/gfs2/fsck/pass4.c0000664000175000017500000001535712110647577014541 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "fsck.h" #include "lost_n_found.h" #include "inode_hash.h" #include "metawalk.h" #include "util.h" struct metawalk_fxns pass4_fxns_delete = { .private = NULL, .check_metalist = delete_metadata, .check_data = delete_data, .check_eattr_indir = delete_eattr_indir, .check_eattr_leaf = delete_eattr_leaf, }; /* Updates the link count of an inode to what the fsck has seen for * link count */ static int fix_link_count(struct inode_info *ii, struct gfs2_inode *ip) { log_info( _("Fixing inode link count (%d->%d) for %llu (0x%llx) \n"), ip->i_di.di_nlink, ii->counted_links, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (ip->i_di.di_nlink == ii->counted_links) return 0; ip->i_di.di_nlink = ii->counted_links; bmodified(ip->i_bh); log_debug( _("Changing inode %llu (0x%llx) to have %u links\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, ii->counted_links); return 0; } static int scan_inode_list(struct gfs2_sbd *sdp) { struct osi_node *tmp, *next = NULL; struct inode_info *ii; struct gfs2_inode *ip; int lf_addition = 0; uint8_t q; /* FIXME: should probably factor this out into a generic * scanning fxn */ for (tmp = osi_first(&inodetree); tmp; tmp = next) { if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return 0; next = osi_next(tmp); if (!(ii = (struct inode_info *)tmp)) { log_crit( _("osi_tree broken in scan_info_list!!\n")); exit(FSCK_ERROR); } /* Don't check reference counts on the special gfs files */ if (sdp->gfs1 && ((ii->di_num.no_addr == sdp->md.riinode->i_di.di_num.no_addr) || (ii->di_num.no_addr == sdp->md.jiinode->i_di.di_num.no_addr) || (ii->di_num.no_addr == sdp->md.qinode->i_di.di_num.no_addr) || (ii->di_num.no_addr == sdp->md.statfs->i_di.di_num.no_addr))) continue; if (ii->counted_links == 0) { log_err( _("Found unlinked inode at %llu (0x%llx)\n"), (unsigned long long)ii->di_num.no_addr, (unsigned long long)ii->di_num.no_addr); q = block_type(ii->di_num.no_addr); if (q == gfs2_bad_block) { log_err( _("Unlinked inode %llu (0x%llx) contains " "bad blocks\n"), (unsigned long long)ii->di_num.no_addr, (unsigned long long)ii->di_num.no_addr); if (query( _("Delete unlinked inode with bad " "blocks? (y/n) "))) { ip = fsck_load_inode(sdp, ii->di_num.no_addr); check_inode_eattr(ip, &pass4_fxns_delete); check_metatree(ip, &pass4_fxns_delete); fsck_blockmap_set(ip, ii->di_num.no_addr, _("bad unlinked"), gfs2_block_free); fsck_inode_put(&ip); continue; } else log_err( _("Unlinked inode with bad blocks not cleared\n")); } if (q != gfs2_inode_dir && q != gfs2_inode_file && q != gfs2_inode_lnk && q != gfs2_inode_device && q != gfs2_inode_fifo && q != gfs2_inode_sock) { log_err( _("Unlinked block %lld (0x%llx) " "marked as inode is " "not an inode (%d)\n"), (unsigned long long)ii->di_num.no_addr, (unsigned long long)ii->di_num.no_addr, q); ip = fsck_load_inode(sdp, ii->di_num.no_addr); if (query(_("Delete unlinked inode? (y/n) "))) { check_inode_eattr(ip, &pass4_fxns_delete); check_metatree(ip, &pass4_fxns_delete); fsck_blockmap_set(ip, ii->di_num.no_addr, _("invalid unlinked"), gfs2_block_free); fsck_inode_put(&ip); log_err( _("The inode was deleted\n")); } else { log_err( _("The inode was not " "deleted\n")); fsck_inode_put(&ip); } continue; } ip = fsck_load_inode(sdp, ii->di_num.no_addr); /* We don't want to clear zero-size files with * eattrs - there might be relevent info in * them. */ if (!ip->i_di.di_size && !ip->i_di.di_eattr){ log_err( _("Unlinked inode has zero size\n")); if (query(_("Clear zero-size unlinked inode? " "(y/n) "))) { fsck_blockmap_set(ip, ii->di_num.no_addr, _("unlinked zero-length"), gfs2_block_free); fsck_inode_put(&ip); continue; } } if (query( _("Add unlinked inode to lost+found? " "(y/n)"))) { if (add_inode_to_lf(ip)) { stack; fsck_inode_put(&ip); return -1; } else { fix_link_count(ii, ip); lf_addition = 1; } } else log_err( _("Unlinked inode left unlinked\n")); fsck_inode_put(&ip); } /* if (ii->counted_links == 0) */ else if (ii->di_nlink != ii->counted_links) { log_err( _("Link count inconsistent for inode %llu" " (0x%llx) has %u but fsck found %u.\n"), (unsigned long long)ii->di_num.no_addr, (unsigned long long)ii->di_num.no_addr, ii->di_nlink, ii->counted_links); /* Read in the inode, adjust the link count, * and write it back out */ if (query( _("Update link count for inode %llu" " (0x%llx) ? (y/n) "), (unsigned long long)ii->di_num.no_addr, (unsigned long long)ii->di_num.no_addr)) { ip = fsck_load_inode(sdp, ii->di_num.no_addr); /* bread, inode_get */ fix_link_count(ii, ip); ii->di_nlink = ii->counted_links; fsck_inode_put(&ip); /* out, brelse, free */ log_warn( _("Link count updated to %d for " "inode %llu (0x%llx)\n"), ii->di_nlink, (unsigned long long)ii->di_num.no_addr, (unsigned long long)ii->di_num.no_addr); } else { log_err( _("Link count for inode %llu (0x%llx" ") still incorrect\n"), (unsigned long long)ii->di_num.no_addr, (unsigned long long)ii->di_num.no_addr); } } log_debug( _("block %llu (0x%llx) has link count %d\n"), (unsigned long long)ii->di_num.no_addr, (unsigned long long)ii->di_num.no_addr, ii->di_nlink); } /* osi_list_foreach(tmp, list) */ if (lf_addition) { if (!(ii = inodetree_find(lf_dip->i_di.di_num.no_addr))) { log_crit( _("Unable to find lost+found inode in inode_hash!!\n")); return -1; } else { fix_link_count(ii, lf_dip); } } return 0; } /** * pass4 - Check reference counts (pass 2 & 6 in current fsck) * * handle unreferenced files * lost+found errors (missing, not a directory, no space) * adjust link count * handle unreferenced inodes of other types * handle bad blocks */ int pass4(struct gfs2_sbd *sdp) { if (lf_dip) log_debug( _("At beginning of pass4, lost+found entries is %u\n"), lf_dip->i_di.di_entries); log_info( _("Checking inode reference counts.\n")); if (scan_inode_list(sdp)) { stack; return FSCK_ERROR; } if (lf_dip) log_debug( _("At end of pass4, lost+found entries is %u\n"), lf_dip->i_di.di_entries); return FSCK_OK; } gfs2-utils/gfs2/fsck/pass5.c0000664000175000017500000002022512111707433014515 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "fsck.h" #include "util.h" static int gfs1_convert_mark(uint8_t q, uint32_t *count) { switch(q) { case gfs2_meta_inval: case gfs2_inode_invalid: /* Convert invalid metadata to free blocks */ case gfs2_block_free: count[0]++; return GFS2_BLKST_FREE; case gfs2_block_used: count[2]++; return GFS2_BLKST_USED; case gfs2_inode_dir: case gfs2_inode_file: case gfs2_inode_lnk: case gfs2_inode_device: case gfs2_inode_fifo: case gfs2_inode_sock: count[1]++; return GFS2_BLKST_DINODE; case gfs2_indir_blk: case gfs2_leaf_blk: /*case gfs2_meta_rgrp:*/ case gfs2_jdata: /* gfs1 jdata blocks count as "metadata" and gfs1 metadata is marked the same as gfs2 inode in the bitmap. */ case gfs2_meta_eattr: count[3]++; return GFS2_BLKST_DINODE; case gfs2_freemeta: count[4]++; return GFS2_BLKST_UNLINKED; default: log_err( _("Invalid block type %d found\n"), q); } return -1; } static int gfs2_convert_mark(uint8_t q, uint32_t *count) { switch(q) { case gfs2_meta_inval: case gfs2_inode_invalid: /* Convert invalid metadata to free blocks */ case gfs2_block_free: count[0]++; return GFS2_BLKST_FREE; case gfs2_block_used: count[2]++; return GFS2_BLKST_USED; case gfs2_inode_dir: case gfs2_inode_file: case gfs2_inode_lnk: case gfs2_inode_device: case gfs2_jdata: /* gfs1 jdata blocks count as "metadata" and gfs1 metadata is marked the same as gfs2 inode in the bitmap. */ case gfs2_inode_fifo: case gfs2_inode_sock: count[1]++; return GFS2_BLKST_DINODE; case gfs2_indir_blk: case gfs2_leaf_blk: case gfs2_meta_eattr: count[2]++; return GFS2_BLKST_USED; case gfs2_freemeta: log_err( _("Invalid freemeta type %d found\n"), q); count[4]++; return -1; default: log_err( _("Invalid block type %d found\n"), q); } return -1; } static int check_block_status(struct gfs2_sbd *sdp, char *buffer, unsigned int buflen, uint64_t *rg_block, uint64_t rg_data, uint32_t *count) { unsigned char *byte, *end; unsigned int bit; unsigned char rg_status; int block_status; uint8_t q; uint64_t block; /* FIXME verify cast */ byte = (unsigned char *) buffer; bit = 0; end = (unsigned char *) buffer + buflen; while (byte < end) { rg_status = ((*byte >> bit) & GFS2_BIT_MASK); block = rg_data + *rg_block; warm_fuzzy_stuff(block); if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return 0; q = block_type(block); if (sdp->gfs1) block_status = gfs1_convert_mark(q, count); else block_status = gfs2_convert_mark(q, count); if (block_status < 0) { log_err( _("Invalid status for block %llu (0x%llx).\n"), (unsigned long long)block, (unsigned long long)block); return block_status; } /* If one node opens a file and another node deletes it, we may be left with a block that appears to be "unlinked" in the bitmap, but nothing links to it. This is a valid case and should be cleaned up by the file system eventually. So we ignore it. */ if (rg_status == GFS2_BLKST_UNLINKED && block_status == GFS2_BLKST_FREE) { log_err( _("Unlinked inode found at block %llu " "(0x%llx).\n"), (unsigned long long)block, (unsigned long long)block); if (query(_("Do you want to reclaim the block? " "(y/n) "))) { if (gfs2_set_bitmap(sdp, block, block_status)) log_err(_("Unlinked block %llu " "(0x%llx) bitmap not fixed." "\n"), (unsigned long long)block, (unsigned long long)block); else log_err(_("Unlinked block %llu " "(0x%llx) bitmap fixed.\n"), (unsigned long long)block, (unsigned long long)block); } else { log_info( _("Unlinked block found at block %llu" " (0x%llx), left unchanged.\n"), (unsigned long long)block, (unsigned long long)block); } } else if (rg_status != block_status) { const char *blockstatus[] = {"Free", "Data", "Unlinked", "inode"}; log_err( _("Block %llu (0x%llx) bitmap says %u (%s) " "but FSCK saw %u (%s)\n"), (unsigned long long)block, (unsigned long long)block, rg_status, blockstatus[rg_status], block_status, blockstatus[block_status]); if (q) /* Don't print redundant "free" */ log_err( _("Metadata type is %u (%s)\n"), q, block_type_string(q)); if (query(_("Fix bitmap for block %llu (0x%llx) ? (y/n) "), (unsigned long long)block, (unsigned long long)block)) { if (gfs2_set_bitmap(sdp, block, block_status)) log_err( _("Repair failed.\n")); else log_err( _("Fixed.\n")); } else log_err( _("Bitmap at block %llu (0x%llx) left inconsistent\n"), (unsigned long long)block, (unsigned long long)block); } (*rg_block)++; bit += GFS2_BIT_SIZE; if (bit >= 8){ bit = 0; byte++; } } return 0; } static void update_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgp, uint32_t *count) { uint32_t i; struct gfs2_bitmap *bits; uint64_t rg_block = 0; int update = 0; struct gfs_rgrp *gfs1rg = (struct gfs_rgrp *)&rgp->rg; for(i = 0; i < rgp->ri.ri_length; i++) { bits = &rgp->bits[i]; /* update the bitmaps */ if (check_block_status(sdp, rgp->bh[i]->b_data + bits->bi_offset, bits->bi_len, &rg_block, rgp->ri.ri_data0, count)) return; if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return; } /* actually adjust counters and write out to disk */ if (rgp->rg.rg_free != count[0]) { log_err( _("RG #%llu (0x%llx) free count inconsistent: " "is %u should be %u\n"), (unsigned long long)rgp->ri.ri_addr, (unsigned long long)rgp->ri.ri_addr, rgp->rg.rg_free, count[0]); rgp->rg.rg_free = count[0]; update = 1; } if (rgp->rg.rg_dinodes != count[1]) { log_err( _("RG #%llu (0x%llx) Inode count inconsistent: is " "%u should be %u\n"), (unsigned long long)rgp->ri.ri_addr, (unsigned long long)rgp->ri.ri_addr, rgp->rg.rg_dinodes, count[1]); rgp->rg.rg_dinodes = count[1]; update = 1; } if (sdp->gfs1 && gfs1rg->rg_usedmeta != count[3]) { log_err( _("RG #%llu (0x%llx) Used metadata count " "inconsistent: is %u should be %u\n"), (unsigned long long)rgp->ri.ri_addr, (unsigned long long)rgp->ri.ri_addr, gfs1rg->rg_usedmeta, count[3]); gfs1rg->rg_usedmeta = count[3]; update = 1; } if (sdp->gfs1 && gfs1rg->rg_freemeta != count[4]) { log_err( _("RG #%llu (0x%llx) Free metadata count " "inconsistent: is %u should be %u\n"), (unsigned long long)rgp->ri.ri_addr, (unsigned long long)rgp->ri.ri_addr, gfs1rg->rg_freemeta, count[4]); gfs1rg->rg_freemeta = count[4]; update = 1; } if (!sdp->gfs1 && (rgp->ri.ri_data - count[0] - count[1]) != count[2]) { /* FIXME not sure how to handle this case ATM - it * means that the total number of blocks we've counted * exceeds the blocks in the rg */ log_err( _("Internal fsck error - AAHHH!\n")); exit(FSCK_ERROR); } if (update) { if (query( _("Update resource group counts? (y/n) "))) { log_warn( _("Resource group counts updated\n")); /* write out the rgrp */ if (sdp->gfs1) gfs_rgrp_out(gfs1rg, rgp->bh[0]); else gfs2_rgrp_out_bh(&rgp->rg, rgp->bh[0]); } else log_err( _("Resource group counts left inconsistent\n")); } } /** * pass5 - check resource groups * * fix free block maps * fix used inode maps */ int pass5(struct gfs2_sbd *sdp) { struct osi_node *n, *next = NULL; struct rgrp_tree *rgp = NULL; uint32_t count[5]; uint64_t rg_count = 0; /* Reconcile RG bitmaps with fsck bitmap */ for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); if (skip_this_pass || fsck_abort) /* if asked to skip the rest */ return FSCK_OK; log_info( _("Verifying Resource Group #%llu\n"), (unsigned long long)rg_count); memset(count, 0, sizeof(count)); rgp = (struct rgrp_tree *)n; rg_count++; /* Compare the bitmaps and report the differences */ update_rgrp(sdp, rgp, count); } /* Fix up superblock info based on this - don't think there's * anything to do here... */ return FSCK_OK; } gfs2-utils/gfs2/fsck/rgrepair.c0000664000175000017500000010243012144433405015275 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "osi_list.h" #include "fsck.h" int rindex_modified = FALSE; struct special_blocks false_rgrps; #define BAD_RG_PERCENT_TOLERANCE 11 #define AWAY_FROM_BITMAPS 0x1000 #define ri_equal(ondisk, expected, field) (ondisk.field == expected.field) #define ri_compare(rg, ondisk, expected, field, fmt, type) \ if (ondisk.field != expected.field) { \ log_warn( _("rindex #%d " #field " discrepancy: index 0x%" \ fmt " != expected: 0x%" fmt "\n"), \ rg + 1, (type)ondisk.field, (type)expected.field); \ ondisk.field = expected.field; \ rindex_modified = TRUE; \ } /* * find_journal_entry_rgs - find all RG blocks within all journals * * Since Resource Groups (RGs) are journaled, it is not uncommon for them * to appear inside a journal. But if there is severe damage to the rindex * file or some of the RGs, we may need to hunt and peck for RGs and in that * case, we don't want to mistake these blocks that look just a real RG * for a real RG block. These are "fake" RGs that need to be ignored for * the purposes of finding where things are. */ static void find_journaled_rgs(struct gfs2_sbd *sdp) { int j, new = 0; unsigned int jblocks; uint64_t b, dblock; uint32_t extlen; struct gfs2_inode *ip; struct gfs2_buffer_head *bh; osi_list_init(&false_rgrps.list); for (j = 0; j < sdp->md.journals; j++) { log_debug( _("Checking for rgrps in journal%d.\n"), j); ip = sdp->md.journal[j]; jblocks = ip->i_di.di_size / sdp->sd_sb.sb_bsize; for (b = 0; b < jblocks; b++) { block_map(ip, b, &new, &dblock, &extlen, 0); if (!dblock) break; bh = bread(sdp, dblock); if (!gfs2_check_meta(bh, GFS2_METATYPE_RG)) { log_debug( _("False rgrp found at block 0x%llx\n"), (unsigned long long)dblock); gfs2_special_set(&false_rgrps, dblock); } brelse(bh); } } } static int is_false_rg(uint64_t block) { if (blockfind(&false_rgrps, block)) return 1; return 0; } /* * find_shortest_rgdist - hunt and peck for the shortest distance between RGs. * * Sample several of them because an RG that's been blasted may * look like twice the distance. If we can find 6 of them, that * should be enough to figure out the correct layout. * This also figures out first_rg_dist since that's always different. */ static uint64_t find_shortest_rgdist(struct gfs2_sbd *sdp, uint64_t *initial_first_rg_dist, uint64_t *first_rg_dist) { uint64_t blk, block_of_last_rg, shortest_dist_btwn_rgs; struct gfs2_buffer_head *bh; int number_of_rgs = 0; struct gfs2_rindex buf, tmpndx; /* Figure out if there are any RG-looking blocks in the journal we need to ignore. */ find_journaled_rgs(sdp); *initial_first_rg_dist = *first_rg_dist = sdp->sb_addr + 1; block_of_last_rg = sdp->sb_addr + 1; shortest_dist_btwn_rgs = sdp->device.length; for (blk = sdp->sb_addr + 1; blk < sdp->device.length && number_of_rgs < 6; blk++) { bh = bread(sdp, blk); if (((blk == sdp->sb_addr + 1) || (!gfs2_check_meta(bh, GFS2_METATYPE_RG))) && !is_false_rg(blk)) { log_debug( _("rgrp found at block 0x%llx\n"), (unsigned long long)blk); if (blk > sdp->sb_addr + 1) { uint64_t rgdist; rgdist = blk - block_of_last_rg; log_debug("dist 0x%llx = 0x%llx - 0x%llx", (unsigned long long)rgdist, (unsigned long long)blk, (unsigned long long)block_of_last_rg); /* ----------------------------------------- */ /* We found an RG. Check to see if we need */ /* to set the first_rg_dist based on whether */ /* it's still at its initial value (i.e. the */ /* fs.) The first rg distance is different */ /* from the rest because of the superblock */ /* and 64K dead space. */ /* ----------------------------------------- */ if (*first_rg_dist == *initial_first_rg_dist) *first_rg_dist = rgdist; if (rgdist < shortest_dist_btwn_rgs) { shortest_dist_btwn_rgs = rgdist; log_debug( _("(shortest so far)\n")); } else log_debug("\n"); } block_of_last_rg = blk; number_of_rgs++; blk += 250; /* skip ahead for performance */ } brelse(bh); } /* -------------------------------------------------------------- */ /* Sanity-check our first_rg_dist. If RG #2 got nuked, the */ /* first_rg_dist would measure from #1 to #3, which would be bad. */ /* We need to take remedial measures to fix it (from the index). */ /* -------------------------------------------------------------- */ log_debug( _("First rgrp distance: 0x%llx\n"), (unsigned long long)*first_rg_dist); log_debug( _("Distance between rgrps: 0x%llx\n"), (unsigned long long)shortest_dist_btwn_rgs); if (*first_rg_dist >= shortest_dist_btwn_rgs + (shortest_dist_btwn_rgs / 4)) { /* read in the second RG index entry for this subd. */ gfs2_readi(sdp->md.riinode, (char *)&buf, sizeof(struct gfs2_rindex), sizeof(struct gfs2_rindex)); gfs2_rindex_in(&tmpndx, (char *)&buf); if (tmpndx.ri_addr > sdp->sb_addr + 1) { /* sanity check */ log_warn( _("rgrp 2 is damaged: getting dist from index: ")); *first_rg_dist = tmpndx.ri_addr - (sdp->sb_addr + 1); log_warn("0x%llx\n", (unsigned long long)*first_rg_dist); } else { log_warn( _("rgrp index 2 is damaged: extrapolating dist: ")); *first_rg_dist = sdp->device.length - (sdp->rgrps - 1) * (sdp->device.length / sdp->rgrps); log_warn("0x%llx\n", (unsigned long long)*first_rg_dist); } log_debug( _("Adjusted first rgrp distance: 0x%llx\n"), (unsigned long long)*first_rg_dist); } /* if first RG distance is within tolerance */ gfs2_special_free(&false_rgrps); return shortest_dist_btwn_rgs; } /* * count_usedspace - count the used bits in a rgrp bitmap buffer */ static uint64_t count_usedspace(struct gfs2_sbd *sdp, int first, struct gfs2_buffer_head *bh) { int off, x, y, bytes_to_check; uint32_t rg_used = 0; unsigned int state; /* Count up the free blocks in the bitmap */ if (first) { if (sdp->gfs1) off = sizeof(struct gfs_rgrp); else off = sizeof(struct gfs2_rgrp); } else off = sizeof(struct gfs2_meta_header); bytes_to_check = sdp->bsize - off; for (x = 0; x < bytes_to_check; x++) { unsigned char *byte; byte = (unsigned char *)&bh->b_data[off + x]; if (*byte == 0x55) { rg_used += GFS2_NBBY; continue; } if (*byte == 0x00) continue; for (y = 0; y < GFS2_NBBY; y++) { state = (*byte >> (GFS2_BIT_SIZE * y)) & GFS2_BIT_MASK; if (state == GFS2_BLKST_FREE || state == GFS2_BLKST_UNLINKED) continue; rg_used++; } } return rg_used; } /* * find_next_rgrp_dist - find the distance to the next rgrp * * This function is only called if the rgrps are determined to be on uneven * boundaries. In a normal gfs2 file system, after mkfs.gfs2, all the * rgrps but the first and second one will be the same distance from the * previous rgrp. (The first rgrp will predictably be after the superblock * and the second one will be adjusted based on the number 64KB skipped * at the start of the file system.) The only way we can deviate from that * pattern is if the user did gfs_grow on a gfs1 file system, then converted * it to gfs2 using gfs2_convert. * * This function finds the distance to the next rgrp for these cases. */ static uint64_t find_next_rgrp_dist(struct gfs2_sbd *sdp, uint64_t blk, struct rgrp_tree *prevrgd) { struct osi_node *n, *next = NULL; uint64_t rgrp_dist = 0, used_blocks, block, next_block, twogigs; struct rgrp_tree *rgd = NULL, *next_rgd; struct gfs2_buffer_head *bh; struct gfs2_meta_header mh; int first, length, b, found, mega_in_blocks; uint32_t free_blocks; for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rgd = (struct rgrp_tree *)n; if (rgd->ri.ri_addr == blk) break; } if (rgd && n && osi_next(n) && rgd->ri.ri_addr == blk) { n = osi_next(n); next_rgd = (struct rgrp_tree *)n; rgrp_dist = next_rgd->ri.ri_addr - rgd->ri.ri_addr; return rgrp_dist; } mega_in_blocks = (1024 * 1024) / sdp->bsize; twogigs = 2048 * mega_in_blocks; /* Unfortunately, if we fall through to here we can't trust the rindex. So we have to analyze the current rgrp to figure out the bare minimum block number where it ends. If we don't have rindex, all we know about this rgrp is what's on disk: its rg_free. If we analyze the rgrp's bitmap and the bitmaps that follow, we can figure out how many bits are used. If we add rg_free, we get the total number of blocks this rgrp represents. After that should be the next rgrp, but it may skip a few blocks (hopefully no more than 4). */ used_blocks = 0; length = 0; block = prevrgd->ri.ri_addr; first = 1; found = 0; while (1) { if (block >= sdp->device.length) break; if (block >= prevrgd->ri.ri_addr + twogigs) break; bh = bread(sdp, block); gfs2_meta_header_in(&mh, bh); if ((mh.mh_magic != GFS2_MAGIC) || (first && mh.mh_type != GFS2_METATYPE_RG) || (!first && mh.mh_type != GFS2_METATYPE_RB)) { brelse(bh); break; } if (first) { struct gfs2_rgrp *rg; rg = (struct gfs2_rgrp *)bh->b_data; free_blocks = be32_to_cpu(rg->rg_free); } used_blocks += count_usedspace(sdp, first, bh); first = 0; block++; length++; brelse(bh); /* Check if this distance points to an rgrp: We have to look for blocks that resemble rgrps and bitmaps. If they do, we need to count blocks used and free and see if adding that number of free blocks accounts for the next rgrp we find. Otherwise, you could have a length of 6 with additional user blocks that just happen to look like bitmap blocks. Count them all as bitmaps and you'll be hopelessly lost. */ rgrp_dist = used_blocks + free_blocks + length; next_block = prevrgd->ri.ri_addr + rgrp_dist; /* Now we account for block rounding done by mkfs.gfs2 */ for (b = 0; b <= length + GFS2_NBBY; b++) { if (next_block >= sdp->device.length) break; bh = bread(sdp, next_block + b); gfs2_meta_header_in(&mh, bh); brelse(bh); if (mh.mh_magic == GFS2_MAGIC) { if (mh.mh_type == GFS2_METATYPE_RG) { found = 1; break; } /* if the first thing we find is a bitmap, there must be a damaged rgrp on the previous block. */ if (mh.mh_type == GFS2_METATYPE_RB) { found = 1; rgrp_dist--; break; } } rgrp_dist++; } if (found) { block = next_block; log_info( _("rgrp found at 0x%llx, length=%d, " "used=%llu, free=%d\n"), prevrgd->ri.ri_addr, length, (unsigned long long)used_blocks, free_blocks); break; } } return rgrp_dist; } /* * hunt_and_peck - find the distance to the next rgrp * * This function is only called if the rgrps are determined to be on uneven * boundaries, and also corrupt. So we have to go out searching for one. */ static uint64_t hunt_and_peck(struct gfs2_sbd *sdp, uint64_t blk, struct rgrp_tree *prevrgd, uint64_t last_bump) { uint64_t rgrp_dist = 0, block, twogigs, last_block, last_meg; struct gfs2_buffer_head *bh; struct gfs2_meta_header mh; int b, mega_in_blocks; /* Skip ahead the previous amount: we might get lucky. If we're close to the end of the device, take the rest. */ if (gfs2_check_range(sdp, blk + last_bump)) return sdp->fssize - blk; bh = bread(sdp, blk + last_bump); gfs2_meta_header_in(&mh, bh); brelse(bh); if (mh.mh_magic == GFS2_MAGIC && mh.mh_type == GFS2_METATYPE_RG) { log_info( _("rgrp found at 0x%llx, length=%lld\n"), (unsigned long long)blk + last_bump, (unsigned long long)last_bump); return last_bump; } rgrp_dist = AWAY_FROM_BITMAPS; /* Get away from any bitmaps associated with the previous rgrp */ block = prevrgd->ri.ri_addr + rgrp_dist; /* Now we account for block rounding done by mkfs.gfs2. A rgrp can be at most 2GB in size, so that's where we call it. We do somewhat obscure math here to avoid integer overflows. */ mega_in_blocks = (1024 * 1024) / sdp->bsize; twogigs = 2048 * mega_in_blocks; if (block + twogigs <= sdp->fssize) { last_block = twogigs; last_meg = 0; } else { /* There won't be a rgrp in the last megabyte. */ last_block = sdp->fssize - block - mega_in_blocks; last_meg = mega_in_blocks; } for (b = AWAY_FROM_BITMAPS; b < last_block; b++) { bh = bread(sdp, block + b); gfs2_meta_header_in(&mh, bh); brelse(bh); if (mh.mh_magic == GFS2_MAGIC) { if (mh.mh_type == GFS2_METATYPE_RG) break; /* if the first thing we find is a bitmap, there must be a damaged rgrp on the previous block. */ if (mh.mh_type == GFS2_METATYPE_RB) { rgrp_dist--; break; } } rgrp_dist++; } return rgrp_dist + last_meg; } /* * gfs2_rindex_rebuild - rebuild a corrupt Resource Group (RG) index manually * where trust_lvl == distrust * * If this routine is called, it means we have RGs in odd/unexpected places, * and there is a corrupt RG or RG index entry. It also means we can't trust * the RG index to be sane, and the RGs don't agree with how mkfs would have * built them by default. So we have no choice but to go through and count * them by hand. We've tried twice to recover the RGs and RG index, and * failed, so this is our last chance to remedy the situation. * * This routine tries to minimize performance impact by: * 1. Skipping through the filesystem at known increments when possible. * 2. Shuffle through every block when RGs are not found at the predicted * locations. * * Note: A GFS2 filesystem differs from a GFS1 file system in that there will * only be ONE chunk (i.e. no artificial subdevices on either size of the * journals). The journals and even the rindex are kept as part of the file * system, so we need to rebuild that information by hand. Also, with GFS1, * the different chunks ("subdevices") could have different RG sizes, which * made for quite a mess when trying to recover RGs. GFS2 always uses the * same RG size determined by the original mkfs, so recovery is easier. * * If "gfs_grow" is specified the file system was most likely converted * from gfs1 to gfs2 after a gfs_grow operation. In that case, the rgrps * will not be on predictable boundaries. */ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, int *num_rgs, int gfs_grow) { struct osi_node *n, *next = NULL; struct gfs2_buffer_head *bh; uint64_t shortest_dist_btwn_rgs; uint64_t blk; uint64_t fwd_block, block_bump; uint64_t first_rg_dist, initial_first_rg_dist; struct rgrp_tree *calc_rgd, *prev_rgd; int number_of_rgs, rgi; int rg_was_fnd = FALSE, corrupt_rgs = 0, bitmap_was_fnd; sdp->rgcalc.osi_node = NULL; initial_first_rg_dist = first_rg_dist = sdp->sb_addr + 1; shortest_dist_btwn_rgs = find_shortest_rgdist(sdp, &initial_first_rg_dist, &first_rg_dist); number_of_rgs = 0; /* -------------------------------------------------------------- */ /* Now go through the RGs and verify their integrity, fixing as */ /* needed when corruption is encountered. */ /* -------------------------------------------------------------- */ prev_rgd = NULL; block_bump = first_rg_dist; blk = sdp->sb_addr + 1; while (blk <= sdp->device.length) { log_debug( _("Block 0x%llx\n"), (unsigned long long)blk); bh = bread(sdp, blk); rg_was_fnd = (!gfs2_check_meta(bh, GFS2_METATYPE_RG)); brelse(bh); /* Allocate a new RG and index. */ calc_rgd = rgrp_insert(&sdp->rgcalc, blk); if (!calc_rgd) { log_crit( _("Can't allocate memory for rgrp repair.\n")); return -1; } calc_rgd->ri.ri_length = 1; if (!rg_was_fnd) { /* if not an RG */ /* ------------------------------------------------- */ /* This SHOULD be an RG but isn't. */ /* ------------------------------------------------- */ corrupt_rgs++; if (corrupt_rgs < 5) log_debug( _("Missing or damaged rgrp at block " "%llu (0x%llx)\n"), (unsigned long long)blk, (unsigned long long)blk); else { log_crit( _("Error: too many missing or " "damaged rgrps using this method. " "Time to try another method.\n")); return -1; } } /* ------------------------------------------------ */ /* Now go through and count the bitmaps for this RG */ /* ------------------------------------------------ */ bitmap_was_fnd = FALSE; for (fwd_block = blk + 1; fwd_block < sdp->device.length; fwd_block++) { bh = bread(sdp, fwd_block); bitmap_was_fnd = (!gfs2_check_meta(bh, GFS2_METATYPE_RB)); brelse(bh); if (bitmap_was_fnd) /* if a bitmap */ calc_rgd->ri.ri_length++; else break; /* end of bitmap, so call it quits. */ } /* for subsequent bitmaps */ gfs2_compute_bitstructs(sdp->sd_sb.sb_bsize, calc_rgd); calc_rgd->ri.ri_data0 = calc_rgd->ri.ri_addr + calc_rgd->ri.ri_length; if (prev_rgd) { uint32_t rgblocks; prev_rgd->ri.ri_length = rgblocks2bitblocks(sdp->bsize, block_bump, &rgblocks); prev_rgd->ri.ri_data = rgblocks; prev_rgd->ri.ri_data0 = prev_rgd->ri.ri_addr + prev_rgd->ri.ri_length; prev_rgd->ri.ri_data -= prev_rgd->ri.ri_data % GFS2_NBBY; prev_rgd->ri.ri_bitbytes = prev_rgd->ri.ri_data / GFS2_NBBY; log_debug( _("Prev ri_data set to: %lx.\n"), (unsigned long)prev_rgd->ri.ri_data); } number_of_rgs++; if (rg_was_fnd) log_info( _(" rgrp %d at block 0x%llx intact"), number_of_rgs, (unsigned long long)blk); else log_warn( _("* rgrp %d at block 0x%llx *** DAMAGED ***"), number_of_rgs, (unsigned long long)blk); prev_rgd = calc_rgd; /* * Figure out where our next rgrp should be. */ if (blk == sdp->sb_addr + 1) block_bump = first_rg_dist; else if (!gfs_grow) { block_bump = shortest_dist_btwn_rgs; /* if we have uniformly-spaced rgrps, there may be some wasted space at the end of the device. Since we don't want to create a short rgrp and break our uniformity, just quit here. */ if (blk + (2 * block_bump) > sdp->device.length) break; } else if (rg_was_fnd) block_bump = find_next_rgrp_dist(sdp, blk, prev_rgd); else block_bump = hunt_and_peck(sdp, blk, prev_rgd, block_bump); if (block_bump != 1) { if (rg_was_fnd) log_info( _(" [length 0x%llx]\n"), (unsigned long long)block_bump); else log_warn( _(" [length 0x%llx]\n"), (unsigned long long)block_bump); } else { log_warn("\n"); } blk += block_bump; } /* for each rg block */ /* ----------------------------------------------------------------- */ /* If we got to the end of the fs, we still need to fix the */ /* allocation information for the very last RG. */ /* ----------------------------------------------------------------- */ if (prev_rgd && !prev_rgd->ri.ri_data) { uint32_t rgblocks; prev_rgd->ri.ri_length = rgblocks2bitblocks(sdp->bsize, block_bump, &rgblocks); prev_rgd->ri.ri_data0 = prev_rgd->ri.ri_addr + prev_rgd->ri.ri_length; prev_rgd->ri.ri_data = rgblocks; prev_rgd->ri.ri_data -= prev_rgd->ri.ri_data % GFS2_NBBY; prev_rgd->ri.ri_bitbytes = prev_rgd->ri.ri_data / GFS2_NBBY; log_debug( _("Prev ri_data set to: %lx.\n"), (unsigned long)prev_rgd->ri.ri_data); prev_rgd = NULL; /* make sure we don't use it later */ } /* ---------------------------------------------- */ /* Now dump out the information (if verbose mode) */ /* ---------------------------------------------- */ log_debug( _("rindex rebuilt as follows:\n")); for (n = osi_first(&sdp->rgcalc), rgi = 0; n; n = next, rgi++) { next = osi_next(n); calc_rgd = (struct rgrp_tree *)n; log_debug("%d: 0x%llx / %x / 0x%llx" " / 0x%x / 0x%x\n", rgi + 1, (unsigned long long)calc_rgd->ri.ri_addr, calc_rgd->ri.ri_length, calc_rgd->ri.ri_data0, calc_rgd->ri.ri_data, calc_rgd->ri.ri_bitbytes); } *num_rgs = number_of_rgs; return 0; } static void debug_print_rgrps(struct gfs2_sbd *sdp, struct osi_root *rgtree) { struct osi_node *n, *next; struct rgrp_tree *rl; if (sdp->debug) { log_info("\n"); for (n = osi_first(rgtree); n; n = next) { next = osi_next(n); rl = (struct rgrp_tree *)n; log_info("rg_o = %llu, rg_l = %llu\n", (unsigned long long)rl->start, (unsigned long long)rl->length); } } } /* * gfs2_rindex_calculate - calculate what the rindex should look like * in a perfect world (trust_lvl == open_minded) * * Calculate what the rindex should look like, * so we can later check if all RG index entries are sane. * This is a lot easier for gfs2 because we can just call the same libgfs2 * functions used by mkfs. * * Returns: 0 on success, -1 on failure * Sets: sdp->rglist to a linked list of fsck_rgrp structs representing * what we think the rindex should really look like. */ static int gfs2_rindex_calculate(struct gfs2_sbd *sdp, int *num_rgs) { uint64_t num_rgrps = 0; /* ----------------------------------------------------------------- */ /* Calculate how many RGs there are supposed to be based on the */ /* rindex filesize. Remember that our trust level is open-minded */ /* here. If the filesize of the rindex file is not a multiple of */ /* our rindex structures, then something's wrong and we can't trust */ /* the index. */ /* ----------------------------------------------------------------- */ *num_rgs = sdp->md.riinode->i_di.di_size / sizeof(struct gfs2_rindex); sdp->rgcalc.osi_node = NULL; fix_device_geometry(sdp); /* Try all possible rgrp sizes: 2048, 1024, 512, 256, 128, 64, 32 */ for (sdp->rgsize = GFS2_DEFAULT_RGSIZE; sdp->rgsize >= 32; sdp->rgsize /= 2) { num_rgrps = how_many_rgrps(sdp, &sdp->device, TRUE); if (num_rgrps == *num_rgs) { log_info(_("rgsize must be: %lld (0x%llx)\n"), (unsigned long long)sdp->rgsize, (unsigned long long)sdp->rgsize); break; } } /* Compute the default resource group layout as mkfs would have done */ compute_rgrp_layout(sdp, &sdp->rgcalc, TRUE); debug_print_rgrps(sdp, &sdp->rgcalc); build_rgrps(sdp, FALSE); /* FALSE = calc but don't write to disk. */ log_debug( _("fs_total_size = 0x%llx blocks.\n"), (unsigned long long)sdp->device.length); log_warn( _("L3: number of rgs in the index = %d.\n"), *num_rgs); return 0; } /* * rewrite_rg_block - rewrite ("fix") a buffer with rg or bitmap data * returns: 0 if the rg was repaired, otherwise 1 */ static int rewrite_rg_block(struct gfs2_sbd *sdp, struct rgrp_tree *rg, uint64_t errblock) { int x = errblock - rg->ri.ri_addr; const char *typedesc = x ? "GFS2_METATYPE_RB" : "GFS2_METATYPE_RG"; log_err( _("Block #%lld (0x%llx) (%d of %d) is not %s.\n"), (unsigned long long)rg->ri.ri_addr + x, (unsigned long long)rg->ri.ri_addr + x, (int)x+1, (int)rg->ri.ri_length, typedesc); if (query( _("Fix the Resource Group? (y/n)"))) { log_err( _("Attempting to repair the rgrp.\n")); rg->bh[x] = bread(sdp, rg->ri.ri_addr + x); if (x) { struct gfs2_meta_header mh; mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_RB; mh.mh_format = GFS2_FORMAT_RB; gfs2_meta_header_out_bh(&mh, rg->bh[x]); } else { if (sdp->gfs1) memset(&rg->rg, 0, sizeof(struct gfs_rgrp)); else memset(&rg->rg, 0, sizeof(struct gfs2_rgrp)); rg->rg.rg_header.mh_magic = GFS2_MAGIC; rg->rg.rg_header.mh_type = GFS2_METATYPE_RG; rg->rg.rg_header.mh_format = GFS2_FORMAT_RG; rg->rg.rg_free = rg->ri.ri_data; if (sdp->gfs1) gfs_rgrp_out((struct gfs_rgrp *)&rg->rg, rg->bh[x]); else gfs2_rgrp_out_bh(&rg->rg, rg->bh[x]); } brelse(rg->bh[x]); rg->bh[x] = NULL; return 0; } return 1; } /* * expect_rindex_sanity - the rindex file seems trustworthy, so use those * values as our expected values and assume the * damage is only to the rgrps themselves. */ static int expect_rindex_sanity(struct gfs2_sbd *sdp, int *num_rgs) { struct osi_node *n, *next = NULL; struct rgrp_tree *rgd, *exp; *num_rgs = sdp->md.riinode->i_di.di_size / sizeof(struct gfs2_rindex) ; for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rgd = (struct rgrp_tree *)n; exp = rgrp_insert(&sdp->rgcalc, rgd->ri.ri_addr); if (exp == NULL) { fprintf(stderr, "Out of memory in %s\n", __FUNCTION__); exit(-1); } exp->start = rgd->start; exp->length = rgd->length; memcpy(&exp->ri, &rgd->ri, sizeof(exp->ri)); memcpy(&exp->rg, &rgd->rg, sizeof(exp->rg)); exp->bits = NULL; exp->bh = NULL; gfs2_compute_bitstructs(sdp->sd_sb.sb_bsize, exp); } sdp->rgrps = *num_rgs; return 0; } /* * rg_repair - try to repair a damaged rg index (rindex) * trust_lvl - This is how much we trust the rindex file. * blind_faith means we take the rindex at face value. * open_minded means it might be okay, but we should verify it. * distrust means it's not to be trusted, so we should go to * greater lengths to build it from scratch. * indignation means we have corruption, but the file system * was converted from GFS via gfs2_convert, and its rgrps are * not on nice boundaries thanks to previous gfs_grow ops. Lovely. */ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count, int *sane) { struct osi_node *n, *next = NULL, *e, *enext; int error, discrepancies, percent; int calc_rg_count = 0, rgcount_from_index, rg; struct gfs2_rindex buf; if (trust_lvl == blind_faith) return 0; if (trust_lvl == ye_of_little_faith) { /* if rindex seems sane */ /* Don't free previous incarnations in memory, if any. * We need them to copy in the next function: * gfs2_rgrp_free(&sdp->rglist); */ if (!(*sane)) { log_err(_("The rindex file does not meet our " "expectations.\n")); return -1; } error = expect_rindex_sanity(sdp, &calc_rg_count); if (error) { gfs2_rgrp_free(&sdp->rgcalc); return error; } } else if (trust_lvl == open_minded) { /* If we can't trust RG index */ /* Free previous incarnations in memory, if any. */ gfs2_rgrp_free(&sdp->rgtree); /* Calculate our own RG index for comparison */ error = gfs2_rindex_calculate(sdp, &calc_rg_count); if (error) { /* If calculated RGs don't match the fs */ gfs2_rgrp_free(&sdp->rgcalc); return -1; } } else if (trust_lvl == distrust) { /* If we can't trust RG index */ /* Free previous incarnations in memory, if any. */ gfs2_rgrp_free(&sdp->rgtree); error = gfs2_rindex_rebuild(sdp, &calc_rg_count, 0); if (error) { log_crit( _("Error rebuilding rgrp list.\n")); gfs2_rgrp_free(&sdp->rgcalc); return -1; } sdp->rgrps = calc_rg_count; } else if (trust_lvl == indignation) { /* If we can't trust anything */ /* Free previous incarnations in memory, if any. */ gfs2_rgrp_free(&sdp->rgtree); error = gfs2_rindex_rebuild(sdp, &calc_rg_count, 1); if (error) { log_crit( _("Error rebuilding rgrp list.\n")); gfs2_rgrp_free(&sdp->rgcalc); return -1; } sdp->rgrps = calc_rg_count; } /* Read in the rindex */ sdp->rgtree.osi_node = NULL; /* Just to be safe */ rindex_read(sdp, 0, &rgcount_from_index, sane); if (sdp->md.riinode->i_di.di_size % sizeof(struct gfs2_rindex)) { log_warn( _("WARNING: rindex file is corrupt.\n")); gfs2_rgrp_free(&sdp->rgcalc); gfs2_rgrp_free(&sdp->rgtree); return -1; } log_warn( _("L%d: number of rgs expected = %lld.\n"), trust_lvl + 1, (unsigned long long)sdp->rgrps); if (calc_rg_count != sdp->rgrps) { log_warn( _("L%d: They don't match; either (1) the fs was " "extended, (2) an odd\n"), trust_lvl + 1); log_warn( _("L%d: rgrp size was used, or (3) we have a corrupt " "rg index.\n"), trust_lvl + 1); gfs2_rgrp_free(&sdp->rgcalc); gfs2_rgrp_free(&sdp->rgtree); return -1; } /* ------------------------------------------------------------- */ /* Now compare the rindex to what we think it should be. */ /* See how far off our expected values are. If too much, abort. */ /* The theory is: if we calculated the index to have 32 RGs and */ /* we have a large number that are completely wrong, we should */ /* abandon this method of recovery and try a better one. */ /* ------------------------------------------------------------- */ discrepancies = 0; for (rg = 0, n = osi_first(&sdp->rgtree), e = osi_first(&sdp->rgcalc); n && e && !fsck_abort; rg++) { struct rgrp_tree *expected, *actual; next = osi_next(n); enext = osi_next(e); expected = (struct rgrp_tree *)e; actual = (struct rgrp_tree *)n; if (actual->ri.ri_addr < expected->ri.ri_addr) { n = next; discrepancies++; log_info(_("%d addr: 0x%llx < 0x%llx * mismatch\n"), rg + 1, actual->ri.ri_addr, expected->ri.ri_addr); continue; } else if (expected->ri.ri_addr < actual->ri.ri_addr) { e = enext; discrepancies++; log_info(_("%d addr: 0x%llx > 0x%llx * mismatch\n"), rg + 1, actual->ri.ri_addr, expected->ri.ri_addr); continue; } if (!ri_equal(actual->ri, expected->ri, ri_length) || !ri_equal(actual->ri, expected->ri, ri_data0) || !ri_equal(actual->ri, expected->ri, ri_data) || !ri_equal(actual->ri, expected->ri, ri_bitbytes)) { discrepancies++; log_info(_("%d addr: 0x%llx 0x%llx * has mismatch\n"), rg + 1, actual->ri.ri_addr, expected->ri.ri_addr); } n = next; e = enext; } if (rg) { /* Check to see if more than 2% of the rgrps are wrong. */ percent = (discrepancies * 100) / rg; if (percent > BAD_RG_PERCENT_TOLERANCE) { log_warn( _("Level %d didn't work. Too many " "discrepancies.\n"), trust_lvl + 1); log_warn( _("%d out of %d rgrps (%d percent) did not " "match what was expected.\n"), discrepancies, rg, percent); gfs2_rgrp_free(&sdp->rgcalc); gfs2_rgrp_free(&sdp->rgtree); return -1; } } /* ------------------------------------------------------------- */ /* Now compare the rindex to what we think it should be. */ /* Our rindex should be pretty predictable unless we've grown */ /* so look for index problems first before looking at the rgs. */ /* ------------------------------------------------------------- */ for (rg = 0, n = osi_first(&sdp->rgtree), e = osi_first(&sdp->rgcalc); e && !fsck_abort; rg++) { struct rgrp_tree *expected, *actual; if (n) next = osi_next(n); enext = osi_next(e); expected = (struct rgrp_tree *)e; /* If we ran out of actual rindex entries due to rindex damage, fill in a new one with the expected values. */ if (!n) { /* end of actual rindex */ log_err( _("Entry missing from rindex: 0x%llx\n"), (unsigned long long)expected->ri.ri_addr); actual = rgrp_insert(&sdp->rgtree, expected->ri.ri_addr); if (!actual) { log_err(_("Out of memory!\n")); break; } rindex_modified = 1; } else { actual = (struct rgrp_tree *)n; ri_compare(rg, actual->ri, expected->ri, ri_addr, "llx", unsigned long long); ri_compare(rg, actual->ri, expected->ri, ri_length, "lx", unsigned long); ri_compare(rg, actual->ri, expected->ri, ri_data0, "llx", unsigned long long); ri_compare(rg, actual->ri, expected->ri, ri_data, "lx", unsigned long); ri_compare(rg, actual->ri, expected->ri, ri_bitbytes, "lx", unsigned long); } /* If we modified the index, write it back to disk. */ if (rindex_modified) { if (query( _("Fix the index? (y/n)"))) { gfs2_rindex_out(&expected->ri, (char *)&buf); gfs2_writei(sdp->md.riinode, (char *)&buf, rg * sizeof(struct gfs2_rindex), sizeof(struct gfs2_rindex)); actual->ri.ri_addr = expected->ri.ri_addr; actual->ri.ri_length = expected->ri.ri_length; actual->ri.ri_data0 = expected->ri.ri_data0; actual->ri.ri_data = expected->ri.ri_data; actual->ri.ri_bitbytes = expected->ri.ri_bitbytes; /* If our rindex was hosed, ri_length is bad */ /* Therefore, gfs2_compute_bitstructs might */ /* have malloced the wrong length for bitmap */ /* buffers. So we have to redo it. */ if (actual->bits) { free(actual->bits); actual->bits = NULL; } } else log_err( _("rindex not fixed.\n")); gfs2_compute_bitstructs(sdp->sd_sb.sb_bsize, actual); rindex_modified = FALSE; } e = enext; if (n) n = next; } /* ------------------------------------------------------------- */ /* Read the real RGs and check their integrity. */ /* Now we can somewhat trust the rindex and the RG addresses, */ /* so let's read them in, check them and optionally fix them. */ /* ------------------------------------------------------------- */ for (rg = 0, n = osi_first(&sdp->rgtree); n && !fsck_abort; n = next, rg++) { struct rgrp_tree *rgd; uint64_t prev_err = 0, errblock; int i; next = osi_next(n); /* Now we try repeatedly to read in the rg. For every block */ /* we encounter that has errors, repair it and try again. */ i = 0; do { rgd = (struct rgrp_tree *)n; errblock = gfs2_rgrp_read(sdp, rgd); if (errblock) { if (errblock == prev_err) break; prev_err = errblock; rewrite_rg_block(sdp, rgd, errblock); } else { gfs2_rgrp_relse(rgd); break; } i++; } while (i < rgd->ri.ri_length); } *rg_count = rg; gfs2_rgrp_free(&sdp->rgcalc); gfs2_rgrp_free(&sdp->rgtree); return 0; } gfs2-utils/gfs2/fsck/target.mk0000664000175000017500000000011312110647577015142 0ustar andyandy $(eval $(call make-trans-binary,/sbin/fsck.gfs2,gfs2/libgfs2/libgfs2.a)) gfs2-utils/gfs2/fsck/util.c0000664000175000017500000005016612171730232014445 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #define _(String) gettext(String) #include "libgfs2.h" #include "metawalk.h" #include "util.h" const char *reftypes[ref_types + 1] = {"data", "metadata", "extended attribute", "itself", "unimportant"}; void big_file_comfort(struct gfs2_inode *ip, uint64_t blks_checked) { static struct timeval tv; static uint32_t seconds = 0; static uint64_t percent, fsize, chksize; uint64_t one_percent = 0; int i, cs; const char *human_abbrev = " KMGTPE"; one_percent = ip->i_di.di_blocks / 100; if (blks_checked - last_reported_fblock < one_percent) return; last_reported_fblock = blks_checked; gettimeofday(&tv, NULL); if (!seconds) seconds = tv.tv_sec; if (tv.tv_sec == seconds) return; fsize = ip->i_di.di_size; for (i = 0; i < 6 && fsize > 1024; i++) fsize /= 1024; chksize = blks_checked * ip->i_sbd->bsize; for (cs = 0; cs < 6 && chksize > 1024; cs++) chksize /= 1024; seconds = tv.tv_sec; percent = (blks_checked * 100) / ip->i_di.di_blocks; log_notice( _("\rChecking %lld%c of %lld%c of file at %lld (0x%llx)" "- %llu percent complete. \r"), (long long)chksize, human_abbrev[cs], (unsigned long long)fsize, human_abbrev[i], (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)percent); fflush(stdout); } /* Put out a warm, fuzzy message every second so the user */ /* doesn't think we hung. (This may take a long time). */ void warm_fuzzy_stuff(uint64_t block) { static uint64_t one_percent = 0; static struct timeval tv; static uint32_t seconds = 0; if (!one_percent) one_percent = last_fs_block / 100; if (!last_reported_block || block - last_reported_block >= one_percent) { last_reported_block = block; gettimeofday(&tv, NULL); if (!seconds) seconds = tv.tv_sec; if (tv.tv_sec - seconds) { static uint64_t percent; seconds = tv.tv_sec; if (last_fs_block) { percent = (block * 100) / last_fs_block; log_notice( _("\r%llu percent complete.\r"), (unsigned long long)percent); fflush(stdout); } } } } char gfs2_getch(void) { struct termios termattr, savetermattr; char ch; ssize_t size; tcgetattr (STDIN_FILENO, &termattr); savetermattr = termattr; termattr.c_lflag &= ~(ICANON | IEXTEN | ISIG); termattr.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON); termattr.c_cflag &= ~(CSIZE | PARENB); termattr.c_cflag |= CS8; termattr.c_oflag &= ~(OPOST); termattr.c_cc[VMIN] = 0; termattr.c_cc[VTIME] = 0; tcsetattr (STDIN_FILENO, TCSANOW, &termattr); do { size = read(STDIN_FILENO, &ch, 1); if (size) break; usleep(50000); } while (!size); tcsetattr (STDIN_FILENO, TCSANOW, &savetermattr); return ch; } char generic_interrupt(const char *caller, const char *where, const char *progress, const char *question, const char *answers) { fd_set rfds; struct timeval tv; char response; int err, i; FD_ZERO(&rfds); FD_SET(STDIN_FILENO, &rfds); tv.tv_sec = 0; tv.tv_usec = 0; /* Make sure there isn't extraneous input before asking the * user the question */ while((err = select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv))) { if(err < 0) { log_debug("Error in select() on stdin\n"); break; } if(read(STDIN_FILENO, &response, sizeof(char)) < 0) { log_debug("Error in read() on stdin\n"); break; } } while (TRUE) { printf("\n%s interrupted during %s: ", caller, where); if (progress) printf("%s.\n", progress); printf("%s", question); /* Make sure query is printed out */ fflush(NULL); response = gfs2_getch(); printf("\n"); fflush(NULL); if (strchr(answers, response)) break; printf("Bad response, please type "); for (i = 0; i < strlen(answers) - 1; i++) printf("'%c', ", answers[i]); printf(" or '%c'.\n", answers[i]); } return response; } /* fsck_query: Same as gfs2_query except it adjusts errors_found and errors_corrected. */ int fsck_query(const char *format, ...) { va_list args; char response; int ret = 0; errors_found++; fsck_abort = 0; if (opts.yes) { errors_corrected++; return 1; } if (opts.no) return 0; opts.query = TRUE; while (1) { va_start(args, format); vprintf(format, args); va_end(args); /* Make sure query is printed out */ fflush(NULL); response = gfs2_getch(); printf("\n"); fflush(NULL); if (response == 0x3) { /* if interrupted, by ctrl-c */ response = generic_interrupt("Question", "response", NULL, "Do you want to abort " \ "or continue (a/c)?", "ac"); if (response == 'a') { ret = 0; fsck_abort = 1; break; } printf("Continuing.\n"); } else if (tolower(response) == 'y') { errors_corrected++; ret = 1; break; } else if (tolower(response) == 'n') { ret = 0; break; } else { printf("Bad response %d, please type 'y' or 'n'.\n", response); } } opts.query = FALSE; return ret; } /* * gfs2_dup_set - Flag a block as a duplicate * We keep the references in a red/black tree. We can't keep track of every * single inode in the file system, so the first time this function is called * will actually be for the second reference to the duplicated block. * This will return the number of references to the block. * * create - will be set if the call is supposed to create the reference. */ static struct duptree *gfs2_dup_set(uint64_t dblock, int create) { struct osi_node **newn = &dup_blocks.osi_node, *parent = NULL; struct duptree *dt; /* Figure out where to put new node */ while (*newn) { struct duptree *cur = (struct duptree *)*newn; parent = *newn; if (dblock < cur->block) newn = &((*newn)->osi_left); else if (dblock > cur->block) newn = &((*newn)->osi_right); else return cur; } if (!create) return NULL; dt = malloc(sizeof(struct duptree)); if (dt == NULL) { log_crit( _("Unable to allocate duptree structure\n")); return NULL; } dups_found++; memset(dt, 0, sizeof(struct duptree)); /* Add new node and rebalance tree. */ dt->block = dblock; dt->refs = 1; /* reference 1 is actually the reference we need to discover in pass1b. */ dt->first_ref_found = 0; osi_list_init(&dt->ref_inode_list); osi_list_init(&dt->ref_invinode_list); osi_link_node(&dt->node, parent, newn); osi_insert_color(&dt->node, &dup_blocks); return dt; } /** * find_dup_ref_inode - find a duplicate reference inode entry for an inode */ struct inode_with_dups *find_dup_ref_inode(struct duptree *dt, struct gfs2_inode *ip) { osi_list_t *ref; struct inode_with_dups *id; osi_list_foreach(ref, &dt->ref_invinode_list) { id = osi_list_entry(ref, struct inode_with_dups, list); if (id->block_no == ip->i_di.di_num.no_addr) return id; } osi_list_foreach(ref, &dt->ref_inode_list) { id = osi_list_entry(ref, struct inode_with_dups, list); if (id->block_no == ip->i_di.di_num.no_addr) return id; } return NULL; } /* * add_duplicate_ref - Add a duplicate reference to the duplicates tree list * A new element of the tree will be created as needed * When the first reference is discovered in pass1, it realizes it's a * duplicate but it has already forgotten where the first reference was. * So we need to recreate the duplicate reference structure if it's not there. * Later, in pass1b, it has to go back through the file system * and figure out those original references in order to resolve them. * * first - if 1, we're being called from pass1b, in which case we're trying * to find the first reference to this block. If 0, we're being * called from pass1, which is the second reference, which determined * it was a duplicate.. */ int add_duplicate_ref(struct gfs2_inode *ip, uint64_t block, enum dup_ref_type reftype, int first, int inode_valid) { struct inode_with_dups *id; struct duptree *dt; if (!valid_block(ip->i_sbd, block)) return meta_is_good; /* If this is not the first reference (i.e. all calls from pass1) we need to create the duplicate reference. If this is pass1b, we want to ignore references that aren't found. */ dt = gfs2_dup_set(block, !first); if (!dt) /* If this isn't a duplicate */ return meta_is_good; /* If we found the duplicate reference but we've already discovered the first reference (in pass1b) and the other references in pass1, we don't need to count it, so just return. */ if (dt->first_ref_found) return meta_is_good; /* Check for a previous reference to this duplicate */ id = find_dup_ref_inode(dt, ip); /* We have to be careful here. The original referencing dinode may have deemed to be bad and deleted/freed in pass1. In that case, pass1b wouldn't discover the correct [deleted] original reference. In that case, we don't want to be confused and consider this second reference the same as the first. If we do, we'll never be able to resolve it. The first reference can't be the second reference. */ if (id && first && !dt->first_ref_found) { log_info(_("Original reference to block %llu (0x%llx) was " "previously found to be bad and deleted.\n"), (unsigned long long)block, (unsigned long long)block); log_info(_("I'll consider the reference from inode %llu " "(0x%llx) the first reference.\n"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); dt->first_ref_found = 1; return meta_is_good; } /* The first time this is called from pass1 is actually the second reference. When we go back in pass1b looking for the original reference, we don't want to increment the reference count because it's already accounted for. */ if (first) { dt->first_ref_found = 1; dups_found_first++; /* We found another first ref. */ } else { dt->refs++; } if (id == NULL) { /* Check for the inode on the invalid inode reference list. */ uint8_t q; if (!(id = malloc(sizeof(*id)))) { log_crit( _("Unable to allocate " "inode_with_dups structure\n")); return meta_error; } if (!(memset(id, 0, sizeof(*id)))) { log_crit( _("Unable to zero inode_with_dups " "structure\n")); return meta_error; } id->block_no = ip->i_di.di_num.no_addr; q = block_type(ip->i_di.di_num.no_addr); /* If it's an invalid dinode, put it first on the invalid inode reference list otherwise put it on the normal list. */ if (!inode_valid || q == gfs2_inode_invalid) osi_list_add_prev(&id->list, &dt->ref_invinode_list); else { /* If this is a system dinode, we want the duplicate processing to find it first. That way references from inside journals, et al, will take priority. We don't want to delete journals in favor of dinodes that reference a block inside a journal. */ if (fsck_system_inode(ip->i_sbd, id->block_no)) osi_list_add(&id->list, &dt->ref_inode_list); else osi_list_add_prev(&id->list, &dt->ref_inode_list); } } id->reftypecount[reftype]++; id->dup_count++; log_info( _("Found %d reference(s) to block %llu" " (0x%llx) as %s in %s inode #%llu (0x%llx)\n"), id->dup_count, (unsigned long long)block, (unsigned long long)block, reftypes[reftype], inode_valid ? _("valid") : _("invalid"), (unsigned long long)ip->i_di.di_num.no_addr, (unsigned long long)ip->i_di.di_num.no_addr); if (first) log_info( _("This is the original reference.\n")); else log_info( _("This brings the total to: %d duplicate " "references\n"), dt->refs); return meta_is_good; } struct dir_info *dirtree_insert(struct gfs2_inum inum) { struct osi_node **newn = &dirtree.osi_node, *parent = NULL; struct dir_info *data; /* Figure out where to put new node */ while (*newn) { struct dir_info *cur = (struct dir_info *)*newn; parent = *newn; if (inum.no_addr < cur->dinode.no_addr) newn = &((*newn)->osi_left); else if (inum.no_addr > cur->dinode.no_addr) newn = &((*newn)->osi_right); else return cur; } data = malloc(sizeof(struct dir_info)); if (!data) { log_crit( _("Unable to allocate dir_info structure\n")); return NULL; } if (!memset(data, 0, sizeof(struct dir_info))) { log_crit( _("Error while zeroing dir_info structure\n")); return NULL; } /* Add new node and rebalance tree. */ data->dinode.no_addr = inum.no_addr; data->dinode.no_formal_ino = inum.no_formal_ino; osi_link_node(&data->node, parent, newn); osi_insert_color(&data->node, &dirtree); return data; } struct dir_info *dirtree_find(uint64_t block) { struct osi_node *node = dirtree.osi_node; while (node) { struct dir_info *data = (struct dir_info *)node; if (block < data->dinode.no_addr) node = node->osi_left; else if (block > data->dinode.no_addr) node = node->osi_right; else return data; } return NULL; } /* get_ref_type - figure out if all duplicate references from this inode are the same type, and if so, return the type. */ enum dup_ref_type get_ref_type(struct inode_with_dups *id) { enum dup_ref_type t, i; int found_type_with_ref; int found_other_types; for (t = ref_as_data; t < ref_types; t++) { found_type_with_ref = 0; found_other_types = 0; for (i = ref_as_data; i < ref_types; i++) { if (id->reftypecount[i]) { if (t == i) found_type_with_ref = 1; else found_other_types = 1; } } if (found_type_with_ref) return found_other_types ? ref_types : t; } return ref_types; } void dup_listent_delete(struct duptree *dt, struct inode_with_dups *id) { log_err( _("Removing duplicate reference to block %llu (0x%llx) " "referenced as %s by dinode %llu (0x%llx)\n"), (unsigned long long)dt->block, (unsigned long long)dt->block, reftypes[get_ref_type(id)], (unsigned long long)id->block_no, (unsigned long long)id->block_no); dt->refs--; /* one less reference */ if (id->name) free(id->name); osi_list_del(&id->list); free(id); } void dup_delete(struct duptree *dt) { struct inode_with_dups *id; osi_list_t *tmp; while (!osi_list_empty(&dt->ref_invinode_list)) { tmp = (&dt->ref_invinode_list)->next; id = osi_list_entry(tmp, struct inode_with_dups, list); dup_listent_delete(dt, id); } while (!osi_list_empty(&dt->ref_inode_list)) { tmp = (&dt->ref_inode_list)->next; id = osi_list_entry(tmp, struct inode_with_dups, list); dup_listent_delete(dt, id); } osi_erase(&dt->node, &dup_blocks); free(dt); } void dirtree_delete(struct dir_info *b) { osi_erase(&b->node, &dirtree); free(b); } static int gfs2_blockmap_create(struct gfs2_bmap *bmap, uint64_t size) { bmap->size = size; /* Have to add 1 to BLOCKMAP_SIZE since it's 0-based and mallocs * must be 1-based */ bmap->mapsize = BLOCKMAP_SIZE4(size); if (!(bmap->map = malloc(sizeof(char) * bmap->mapsize))) return -ENOMEM; if (!memset(bmap->map, 0, sizeof(char) * bmap->mapsize)) { free(bmap->map); bmap->map = NULL; return -ENOMEM; } return 0; } static void gfs2_blockmap_destroy(struct gfs2_bmap *bmap) { if (bmap->map) free(bmap->map); bmap->size = 0; bmap->mapsize = 0; } struct gfs2_bmap *gfs2_bmap_create(struct gfs2_sbd *sdp, uint64_t size, uint64_t *addl_mem_needed) { struct gfs2_bmap *il; *addl_mem_needed = 0L; il = malloc(sizeof(*il)); if (!il || !memset(il, 0, sizeof(*il))) return NULL; if (gfs2_blockmap_create(il, size)) { *addl_mem_needed = il->mapsize; free(il); il = NULL; } osi_list_init(&sdp->eattr_blocks.list); return il; } int gfs2_blockmap_set(struct gfs2_bmap *bmap, uint64_t bblock, enum gfs2_mark_block mark) { static unsigned char *byte; static uint64_t b; if (bblock > bmap->size) return -1; byte = bmap->map + BLOCKMAP_SIZE4(bblock); b = BLOCKMAP_BYTE_OFFSET4(bblock); *byte &= ~(BLOCKMAP_MASK4 << b); *byte |= (mark & BLOCKMAP_MASK4) << b; return 0; } void *gfs2_bmap_destroy(struct gfs2_sbd *sdp, struct gfs2_bmap *il) { if (il) { gfs2_blockmap_destroy(il); free(il); il = NULL; } gfs2_special_free(&sdp->eattr_blocks); return il; } /* set_ip_blockmap - set the blockmap for a dinode * * instree: Set to 1 if directories should be inserted into the directory tree * otherwise 0. * returns: 0 if no error, -EINVAL if dinode has a bad mode, -EPERM on error */ int set_ip_blockmap(struct gfs2_inode *ip, int instree) { uint64_t block = ip->i_bh->b_blocknr; struct gfs2_sbd *sdp = ip->i_sbd; uint32_t mode; if (sdp->gfs1) mode = gfs_to_gfs2_mode(ip); else mode = ip->i_di.di_mode & S_IFMT; switch (mode) { case S_IFDIR: if (fsck_blockmap_set(ip, block, _("directory"), gfs2_inode_dir)) goto bad_dinode; if (instree && !dirtree_insert(ip->i_di.di_num)) goto bad_dinode; break; case S_IFREG: if (fsck_blockmap_set(ip, block, _("file"), gfs2_inode_file)) goto bad_dinode; break; case S_IFLNK: if (fsck_blockmap_set(ip, block, _("symlink"), gfs2_inode_lnk)) goto bad_dinode; break; case S_IFBLK: if (fsck_blockmap_set(ip, block, _("block device"), gfs2_inode_device)) goto bad_dinode; break; case S_IFCHR: if (fsck_blockmap_set(ip, block, _("character device"), gfs2_inode_device)) goto bad_dinode; break; case S_IFIFO: if (fsck_blockmap_set(ip, block, _("fifo"), gfs2_inode_fifo)) goto bad_dinode; break; case S_IFSOCK: if (fsck_blockmap_set(ip, block, _("socket"), gfs2_inode_sock)) goto bad_dinode; break; default: fsck_blockmap_set(ip, block, _("invalid mode"), gfs2_inode_invalid); return -EINVAL; } return 0; bad_dinode: stack; return -EPERM; } uint64_t find_free_blk(struct gfs2_sbd *sdp) { struct osi_node *n, *next = NULL; struct rgrp_tree *rl = NULL; struct gfs2_rindex *ri; struct gfs2_rgrp *rg; unsigned int block, bn = 0, x = 0, y = 0; unsigned int state; struct gfs2_buffer_head *bh; memset(&rg, 0, sizeof(rg)); for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rl = (struct rgrp_tree *)n; if (rl->rg.rg_free) break; } if (n == NULL) return 0; ri = &rl->ri; rg = &rl->rg; for (block = 0; block < ri->ri_length; block++) { bh = rl->bh[block]; x = (block) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_rgrp); for (; x < sdp->bsize; x++) for (y = 0; y < GFS2_NBBY; y++) { state = (bh->b_data[x] >> (GFS2_BIT_SIZE * y)) & 0x03; if (state == GFS2_BLKST_FREE) return ri->ri_data0 + bn; bn++; } } return 0; } uint64_t *get_dir_hash(struct gfs2_inode *ip) { unsigned hsize = (1 << ip->i_di.di_depth) * sizeof(uint64_t); int ret; uint64_t *tbl = malloc(hsize); if (tbl == NULL) return NULL; ret = gfs2_readi(ip, tbl, 0, hsize); if (ret != hsize) { free(tbl); return NULL; } return tbl; } void delete_all_dups(struct gfs2_inode *ip) { struct osi_node *n, *next; struct duptree *dt; osi_list_t *tmp, *x; struct inode_with_dups *id; int found; for (n = osi_first(&dup_blocks); n; n = next) { next = osi_next(n); dt = (struct duptree *)n; found = 0; id = NULL; osi_list_foreach_safe(tmp, &dt->ref_invinode_list, x) { id = osi_list_entry(tmp, struct inode_with_dups, list); if (id->block_no == ip->i_di.di_num.no_addr) { dup_listent_delete(dt, id); found = 1; } } osi_list_foreach_safe(tmp, &dt->ref_inode_list, x) { id = osi_list_entry(tmp, struct inode_with_dups, list); if (id->block_no == ip->i_di.di_num.no_addr) { dup_listent_delete(dt, id); found = 1; } } if (!found) continue; if (dt->refs == 0) { log_debug(_("This was the last reference: 0x%llx is " "no longer a duplicate.\n"), (unsigned long long)dt->block); dup_delete(dt); /* not duplicate now */ } else { log_debug(_("%d references remain to 0x%llx\n"), dt->refs, (unsigned long long)dt->block); if (dt->refs > 1) continue; id = NULL; osi_list_foreach(tmp, &dt->ref_invinode_list) id = osi_list_entry(tmp, struct inode_with_dups, list); osi_list_foreach(tmp, &dt->ref_inode_list) id = osi_list_entry(tmp, struct inode_with_dups, list); if (id) log_debug("Last reference is from inode " "0x%llx\n", (unsigned long long)id->block_no); } } } gfs2-utils/gfs2/fsck/util.h0000664000175000017500000001252212154127655014456 0ustar andyandy#ifndef __UTIL_H__ #define __UTIL_H__ #include #include "fsck.h" #include "libgfs2.h" #define fsck_lseek(fd, off) \ ((lseek((fd), (off), SEEK_SET) == (off)) ? 0 : -1) #define INODE_VALID 1 #define INODE_INVALID 0 struct di_info *search_list(osi_list_t *list, uint64_t addr); void big_file_comfort(struct gfs2_inode *ip, uint64_t blks_checked); void warm_fuzzy_stuff(uint64_t block); int add_duplicate_ref(struct gfs2_inode *ip, uint64_t block, enum dup_ref_type reftype, int first, int inode_valid); extern struct inode_with_dups *find_dup_ref_inode(struct duptree *dt, struct gfs2_inode *ip); extern void dup_listent_delete(struct duptree *dt, struct inode_with_dups *id); extern const char *reftypes[ref_types + 1]; static inline uint8_t block_type(uint64_t bblock) { static unsigned char *byte; static uint64_t b; static uint8_t btype; byte = bl->map + BLOCKMAP_SIZE4(bblock); b = BLOCKMAP_BYTE_OFFSET4(bblock); btype = (*byte & (BLOCKMAP_MASK4 << b )) >> b; return btype; } /* blockmap declarations and functions */ enum gfs2_mark_block { gfs2_block_free = (0x0), gfs2_block_used = (0x1), gfs2_indir_blk = (0x2), /* These are inode block types (only): */ gfs2_inode_dir = (0x3), gfs2_inode_file = (0x4), gfs2_inode_lnk = (0x5), gfs2_inode_device = (0x6), /* char or block device */ gfs2_inode_fifo = (0x7), gfs2_inode_sock = (0x8), gfs2_inode_invalid = (0x9), /* misc block types: */ gfs2_jdata = (0xa), /* gfs journaled data blocks */ gfs2_meta_inval = (0xb), gfs2_leaf_blk = (0xc), gfs2_freemeta = (0xd), /* was: gfs2_meta_rgrp */ gfs2_meta_eattr = (0xe), gfs2_bad_block = (0xf), /* Contains at least one bad block */ }; static const inline char *block_type_string(uint8_t q) { const char *blktyp[] = { "free", "data", "indirect meta", "directory", "file", "symlink", "device", "fifo", "socket", "invalid inode", "journaled data", "invalid meta", "dir leaf", "free metadata", "eattribute", "bad"}; if (q < 16) return (blktyp[q]); return blktyp[15]; } /* Must be kept in sync with gfs2_mark_block enum above. Blocks marked as invalid or bad are considered metadata until actually freed. */ static inline int blockmap_to_bitmap(enum gfs2_mark_block m, int gfs1) { static int bitmap_states[2][16] = { /* ---------------------- gfs2 ------------------------------*/ {GFS2_BLKST_FREE, /* free */ GFS2_BLKST_USED, /* data */ GFS2_BLKST_USED, /* indirect data or rgrp meta */ GFS2_BLKST_DINODE, /* directory */ GFS2_BLKST_DINODE, /* file */ GFS2_BLKST_DINODE, /* symlink */ GFS2_BLKST_DINODE, /* block or char device */ GFS2_BLKST_DINODE, /* fifo */ GFS2_BLKST_DINODE, /* socket */ GFS2_BLKST_FREE, /* invalid inode */ GFS2_BLKST_USED, /* journaled data */ GFS2_BLKST_FREE, /* invalid meta */ GFS2_BLKST_USED, /* dir leaf */ GFS2_BLKST_UNLINKED, /* GFS unlinked metadata */ GFS2_BLKST_USED, /* eattribute */ GFS2_BLKST_DINODE}, /* bad */ /* ---------------------- gfs1 ----------------------------- */ {GFS2_BLKST_FREE, /* free */ GFS2_BLKST_USED, /* data */ GFS2_BLKST_DINODE, /* indirect data or rgrp meta*/ GFS2_BLKST_DINODE, /* directory */ GFS2_BLKST_DINODE, /* file */ GFS2_BLKST_DINODE, /* symlink */ GFS2_BLKST_DINODE, /* block or char device */ GFS2_BLKST_DINODE, /* fifo */ GFS2_BLKST_DINODE, /* socket */ GFS2_BLKST_FREE, /* invalid inode */ GFS2_BLKST_DINODE, /* journaled data */ GFS2_BLKST_FREE, /* invalid meta */ GFS2_BLKST_DINODE, /* dir leaf */ GFS2_BLKST_UNLINKED, /* GFS unlinked metadata */ GFS2_BLKST_DINODE, /* eattribute */ GFS2_BLKST_DINODE}}; /* bad */ return bitmap_states[gfs1][m]; } static inline int is_dir(struct gfs2_dinode *dinode, int gfs1) { if (gfs1 && is_gfs_dir(dinode)) return 1; if (S_ISDIR(dinode->di_mode)) return 1; return 0; } static inline uint32_t gfs_to_gfs2_mode(struct gfs2_inode *ip) { uint16_t gfs1mode = ip->i_di.__pad1; switch (gfs1mode) { case GFS_FILE_DIR: return S_IFDIR; case GFS_FILE_REG: return S_IFREG; case GFS_FILE_LNK: return S_IFLNK; case GFS_FILE_BLK: return S_IFBLK; case GFS_FILE_CHR: return S_IFCHR; case GFS_FILE_FIFO: return S_IFIFO; case GFS_FILE_SOCK: return S_IFSOCK; default: /* This could be an aborted gfs2_convert so look for both. */ if (ip->i_di.di_entries || (ip->i_di.di_mode & S_IFMT) == S_IFDIR) return S_IFDIR; else return S_IFREG; } } extern enum dup_ref_type get_ref_type(struct inode_with_dups *id); extern struct gfs2_bmap *gfs2_bmap_create(struct gfs2_sbd *sdp, uint64_t size, uint64_t *addl_mem_needed); extern void *gfs2_bmap_destroy(struct gfs2_sbd *sdp, struct gfs2_bmap *il); extern int gfs2_blockmap_set(struct gfs2_bmap *il, uint64_t block, enum gfs2_mark_block mark); extern int set_ip_blockmap(struct gfs2_inode *ip, int instree); extern char generic_interrupt(const char *caller, const char *where, const char *progress, const char *question, const char *answers); extern char gfs2_getch(void); extern uint64_t find_free_blk(struct gfs2_sbd *sdp); extern uint64_t *get_dir_hash(struct gfs2_inode *ip); extern void delete_all_dups(struct gfs2_inode *ip); #define stack log_debug(" - %s()\n", __func__) #endif /* __UTIL_H__ */ gfs2-utils/gfs2/include/Makefile.am0000664000175000017500000000013312154127655016054 0ustar andyandyMAINTAINERCLEANFILES = Makefile.in noinst_HEADERS = osi_list.h osi_tree.h linux_endian.h gfs2-utils/gfs2/include/linux_endian.h0000664000175000017500000000266312154127655016660 0ustar andyandy#ifndef __LINUX_ENDIAN_DOT_H__ #define __LINUX_ENDIAN_DOT_H__ #include #include /* I'm not sure which versions of alpha glibc/gcc are broken, so fix all of them. */ #ifdef __alpha__ #undef bswap_64 static __inline__ unsigned long bswap_64(unsigned long x) { unsigned int h = x >> 32; unsigned int l = x; h = bswap_32(h); l = bswap_32(l); return ((unsigned long)l << 32) | h; } #endif /* __alpha__ */ #if __BYTE_ORDER == __BIG_ENDIAN #define be16_to_cpu(x) (x) #define be32_to_cpu(x) (x) #define be64_to_cpu(x) (x) #define cpu_to_be16(x) (x) #define cpu_to_be32(x) (x) #define cpu_to_be64(x) (x) #define le16_to_cpu(x) (bswap_16((x))) #define le32_to_cpu(x) (bswap_32((x))) #define le64_to_cpu(x) (bswap_64((x))) #define cpu_to_le16(x) (bswap_16((x))) #define cpu_to_le32(x) (bswap_32((x))) #define cpu_to_le64(x) (bswap_64((x))) #endif /* __BYTE_ORDER == __BIG_ENDIAN */ #if __BYTE_ORDER == __LITTLE_ENDIAN #define be16_to_cpu(x) (bswap_16((x))) #define be32_to_cpu(x) (bswap_32((x))) #define be64_to_cpu(x) (bswap_64((x))) #define cpu_to_be16(x) (bswap_16((x))) #define cpu_to_be32(x) (bswap_32((x))) #define cpu_to_be64(x) (bswap_64((x))) #define le16_to_cpu(x) (x) #define le32_to_cpu(x) (x) #define le64_to_cpu(x) (x) #define cpu_to_le16(x) (x) #define cpu_to_le32(x) (x) #define cpu_to_le64(x) (x) #endif /* __BYTE_ORDER == __LITTLE_ENDIAN */ #endif /* __LINUX_ENDIAN_DOT_H__ */ gfs2-utils/gfs2/include/osi_list.h0000664000175000017500000000374612110647577016035 0ustar andyandy#ifndef __OSI_LIST_DOT_H__ #define __OSI_LIST_DOT_H__ struct osi_list { struct osi_list *next, *prev; }; typedef struct osi_list osi_list_t; #define osi_list_decl(var) osi_list_t var = { &var, &var } #define osi_list_empty(var) ((var)->next == (var)) #define osi_list_entry(var, type, mem) ((type *)((unsigned long)(var) - (unsigned long)(&((type *)NULL)->mem))) #define osi_list_init(head) \ do \ { \ osi_list_t *osi_list_var = (head); \ osi_list_var->next = osi_list_var->prev = osi_list_var; \ } \ while (0) #define osi_list_add(new, head) \ do \ { \ osi_list_t *osi_list_var_new = (new); \ osi_list_t *osi_list_var_head = (head); \ osi_list_var_new->next = osi_list_var_head->next; \ osi_list_var_new->prev = osi_list_var_head; \ osi_list_var_head->next->prev = osi_list_var_new; \ osi_list_var_head->next = osi_list_var_new; \ } \ while (0) #define osi_list_add_next osi_list_add #define osi_list_add_prev(new, head) \ do \ { \ osi_list_t *osi_list_var_new = (new); \ osi_list_t *osi_list_var_head = (head); \ osi_list_var_new->prev = osi_list_var_head->prev; \ osi_list_var_new->next = osi_list_var_head; \ osi_list_var_head->prev->next = osi_list_var_new; \ osi_list_var_head->prev = osi_list_var_new; \ } \ while (0) #define osi_list_del(var) \ do \ { \ osi_list_t *osi_list_var = (var); \ osi_list_var->next->prev = osi_list_var->prev; \ osi_list_var->prev->next = osi_list_var->next; \ } \ while (0) #define osi_list_del_init(var) \ do \ { \ osi_list_t *osi_list_var = (var); \ osi_list_var->next->prev = osi_list_var->prev; \ osi_list_var->prev->next = osi_list_var->next; \ osi_list_var->next = osi_list_var->prev = osi_list_var; \ } \ while (0) #define osi_list_foreach(tmp, head) \ for ((tmp) = (head)->next; (tmp) != (head); (tmp) = (tmp)->next) #define osi_list_foreach_safe(tmp, head, x) \ for ((tmp) = (head)->next, (x) = (tmp)->next; \ (tmp) != (head); \ (tmp) = (x), (x) = (x)->next) #endif /* __OSI_LIST_DOT_H__ */ gfs2-utils/gfs2/include/osi_tree.h0000664000175000017500000002206512110647577016014 0ustar andyandy#ifndef __OSI_RBTREE_DOT_H__ #define __OSI_RBTREE_DOT_H__ #include #include #include /* Adapted from the kernel's rbtree.c */ struct osi_node { unsigned long osi_parent_color; #define OSI_RED 0 #define OSI_BLACK 1 struct osi_node *osi_left; struct osi_node *osi_right; struct osi_node *osi_parent; }; #define osi_parent(r) ((struct osi_node *)((r)->osi_parent_color & ~3)) #define osi_color(r) ((r)->osi_parent_color & 1) #define osi_is_red(r) (!osi_color(r)) #define osi_is_black(r) osi_color(r) #define osi_set_red(r) do { (r)->osi_parent_color &= ~1; } while (0) #define osi_set_black(r) do { (r)->osi_parent_color |= 1; } while (0) struct osi_root { struct osi_node *osi_node; }; static inline void osi_set_color(struct osi_node *rb, int color) { rb->osi_parent_color = (rb->osi_parent_color & ~1) | color; } static inline void osi_set_parent(struct osi_node *rb, struct osi_node *p) { rb->osi_parent_color = (rb->osi_parent_color & 3) | (unsigned long)p; } static inline void osi_link_node(struct osi_node *node, struct osi_node *parent, struct osi_node **osi_link) { node->osi_parent_color = (unsigned long )parent; node->osi_left = node->osi_right = NULL; *osi_link = node; } static inline void __osi_rotate_left(struct osi_node *node, struct osi_root *root) { struct osi_node *right = node->osi_right; struct osi_node *parent = osi_parent(node); if ((node->osi_right = right->osi_left)) osi_set_parent(right->osi_left, node); right->osi_left = node; osi_set_parent(right, parent); if (parent) { if (node == parent->osi_left) parent->osi_left = right; else parent->osi_right = right; } else root->osi_node = right; osi_set_parent(node, right); } static inline void __osi_rotate_right(struct osi_node *node, struct osi_root *root) { struct osi_node *left = node->osi_left; struct osi_node *parent = osi_parent(node); if ((node->osi_left = left->osi_right)) osi_set_parent(left->osi_right, node); left->osi_right = node; osi_set_parent(left, parent); if (parent) { if (node == parent->osi_right) parent->osi_right = left; else parent->osi_left = left; } else root->osi_node = left; osi_set_parent(node, left); } static inline void osi_insert_color(struct osi_node *node, struct osi_root *root) { struct osi_node *parent, *gparent; while ((parent = osi_parent(node)) && osi_is_red(parent)) { gparent = osi_parent(parent); if (parent == gparent->osi_left) { { register struct osi_node *uncle = gparent->osi_right; if (uncle && osi_is_red(uncle)) { osi_set_black(uncle); osi_set_black(parent); osi_set_red(gparent); node = gparent; continue; } } if (parent->osi_right == node) { register struct osi_node *tmp; __osi_rotate_left(parent, root); tmp = parent; parent = node; node = tmp; } osi_set_black(parent); osi_set_red(gparent); __osi_rotate_right(gparent, root); } else { { register struct osi_node *uncle = gparent->osi_left; if (uncle && osi_is_red(uncle)) { osi_set_black(uncle); osi_set_black(parent); osi_set_red(gparent); node = gparent; continue; } } if (parent->osi_left == node) { register struct osi_node *tmp; __osi_rotate_right(parent, root); tmp = parent; parent = node; node = tmp; } osi_set_black(parent); osi_set_red(gparent); __osi_rotate_left(gparent, root); } } osi_set_black(root->osi_node); } static inline void __osi_erase_color(struct osi_node *node, struct osi_node *parent, struct osi_root *root) { struct osi_node *other; while ((!node || osi_is_black(node)) && node != root->osi_node) { if (parent->osi_left == node) { other = parent->osi_right; if (osi_is_red(other)) { osi_set_black(other); osi_set_red(parent); __osi_rotate_left(parent, root); other = parent->osi_right; } if ((!other->osi_left || osi_is_black(other->osi_left)) && (!other->osi_right || osi_is_black(other->osi_right))) { osi_set_red(other); node = parent; parent = osi_parent(node); } else { if (!other->osi_right || osi_is_black(other->osi_right)) { struct osi_node *o_left; if ((o_left = other->osi_left)) osi_set_black(o_left); osi_set_red(other); __osi_rotate_right(other, root); other = parent->osi_right; } osi_set_color(other, osi_color(parent)); osi_set_black(parent); if (other->osi_right) osi_set_black(other->osi_right); __osi_rotate_left(parent, root); node = root->osi_node; break; } } else { other = parent->osi_left; if (osi_is_red(other)) { osi_set_black(other); osi_set_red(parent); __osi_rotate_right(parent, root); other = parent->osi_left; } if ((!other->osi_left || osi_is_black(other->osi_left)) && (!other->osi_right || osi_is_black(other->osi_right))) { osi_set_red(other); node = parent; parent = osi_parent(node); } else { if (!other->osi_left || osi_is_black(other->osi_left)) { register struct osi_node *o_right; if ((o_right = other->osi_right)) osi_set_black(o_right); osi_set_red(other); __osi_rotate_left(other, root); other = parent->osi_left; } osi_set_color(other, osi_color(parent)); osi_set_black(parent); if (other->osi_left) osi_set_black(other->osi_left); __osi_rotate_right(parent, root); node = root->osi_node; break; } } } if (node) osi_set_black(node); } static inline void osi_erase(struct osi_node *node, struct osi_root *root) { struct osi_node *child, *parent; int color; if (!node->osi_left) child = node->osi_right; else if (!node->osi_right) child = node->osi_left; else { struct osi_node *old = node, *left; node = node->osi_right; while ((left = node->osi_left) != NULL) node = left; child = node->osi_right; parent = osi_parent(node); color = osi_color(node); if (child) osi_set_parent(child, parent); if (parent == old) { parent->osi_right = child; parent = node; } else parent->osi_left = child; node->osi_parent_color = old->osi_parent_color; node->osi_right = old->osi_right; node->osi_left = old->osi_left; if (osi_parent(old)) { if (osi_parent(old)->osi_left == old) osi_parent(old)->osi_left = node; else osi_parent(old)->osi_right = node; } else root->osi_node = node; osi_set_parent(old->osi_left, node); if (old->osi_right) osi_set_parent(old->osi_right, node); goto color; } parent = osi_parent(node); color = osi_color(node); if (child) osi_set_parent(child, parent); if (parent) { if (parent->osi_left == node) parent->osi_left = child; else parent->osi_right = child; } else root->osi_node = child; color: if (color == OSI_BLACK) __osi_erase_color(child, parent, root); } /* * This function returns the first node (in sort order) of the tree. */ static inline struct osi_node *osi_first(struct osi_root *root) { struct osi_node *n; n = root->osi_node; if (!n) return NULL; while (n->osi_left) n = n->osi_left; return n; } static inline struct osi_node *osi_last(struct osi_root *root) { struct osi_node *n; n = root->osi_node; if (!n) return NULL; while (n->osi_right) n = n->osi_right; return n; } static inline struct osi_node *osi_next(struct osi_node *node) { struct osi_node *parent; /* If we have a right-hand child, go down and then left as far as we can. */ if (node->osi_right) { node = node->osi_right; while (node->osi_left) node=node->osi_left; return node; } /* No right-hand children. Everything down and left is smaller than us, so any 'next' node must be in the general direction of our parent. Go up the tree; any time the ancestor is a right-hand child of its parent, keep going up. First time it's a left-hand child of its parent, said parent is our 'next' node. */ while ((parent = osi_parent(node)) && node == parent->osi_right) node = parent; return parent; } static inline struct osi_node *osi_prev(struct osi_node *node) { struct osi_node *parent; /* If we have a left-hand child, go down and then right as far as we can. */ if (node->osi_left) { node = node->osi_left; while (node->osi_right) node=node->osi_right; return node; } /* No left-hand children. Go up till we find an ancestor which is a right-hand child of its parent */ while ((parent = osi_parent(node)) && node == parent->osi_left) node = parent; return parent; } static inline void osi_replace_node(struct osi_node *victim, struct osi_node *new, struct osi_root *root) { struct osi_node *parent = osi_parent(victim); /* Set the surrounding nodes to point to the replacement */ if (parent) { if (victim == parent->osi_left) parent->osi_left = new; else parent->osi_right = new; } else { root->osi_node = new; } if (victim->osi_left) osi_set_parent(victim->osi_left, new); if (victim->osi_right) osi_set_parent(victim->osi_right, new); /* Copy the pointers/colour from the victim to the replacement */ *new = *victim; } #endif gfs2-utils/gfs2/init.d/gfs20000664000175000017500000000531712166503246014354 0ustar andyandy#!/bin/bash # # gfs2 mount/unmount helper # # chkconfig: - 26 74 # description: mount/unmount gfs2 filesystems configured in /etc/fstab ### BEGIN INIT INFO # Provides: gfs2 # Required-Start: $network cman gfs_controld # Required-Stop: $network cman gfs_controld # Default-Start: # Default-Stop: # Short-Description: mount/unmount gfs2 filesystems configured in /etc/fstab # Description: mount/unmount gfs2 filesystems configured in /etc/fstab ### END INIT INFO # set secure PATH PATH="/bin:/sbin:/usr/sbin:/usr/bin" ### generic wrapper functions success() { echo -ne "[ OK ]\r" } failure() { echo -ne "[FAILED]\r" } ok() { success echo } nok() { echo -e "$errmsg" failure echo exit 1 } # rpm based distros if [ -d /etc/sysconfig ]; then [ -f /etc/init.d/functions ] && . /etc/init.d/functions [ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster [ -f /etc/sysconfig/gfs2 ] && . /etc/sysconfig/gfs2 [ -z "$LOCK_FILE" ] && LOCK_FILE="/var/lock/subsys/gfs2" fi # deb based distros if [ -d /etc/default ]; then [ -f /etc/default/cluster ] && . /etc/default/cluster [ -f /etc/default/gfs2 ] && . /etc/default/gfs2 [ -z "$LOCK_FILE" ] && LOCK_FILE="/var/lock/gfs2" fi # proc is required for both status and stop. # start could live without, but better be consistent with the behavior if [ ! -f /proc/mounts ]; then echo "GFS2: /proc is not available, unable to proceed" exit 1 fi # # This script's behavior is modeled closely after the netfs script. # GFS2FSTAB=$(LC_ALL=C awk '!/^#/ && $3 == "gfs2" && $4 !~ /noauto/ { print $2 }' /etc/fstab) GFS2MTAB=$(LC_ALL=C awk '!/^#/ && $3 == "gfs2" && $2 != "/" { print $2 }' /proc/mounts | sort -r) if [ -z "$GFS2FSTAB" ]; then echo "GFS2: no entries found in /etc/fstab" exit 6 fi # See how we were called. case "$1" in start) [ -z "$GFS2FSTAB" ] && exit 0 echo -n "Mounting GFS2 filesystems: " errmsg="$(mount -a -t gfs2 2>&1)" || nok touch $LOCK_FILE ok ;; stop) [ -z "$GFS2MTAB" ] && exit 0 echo -n "Unmounting GFS2 filesystems: " errmsg="$(umount -a -t gfs2 2>&1)" || nok modprobe -r gfs2 > /dev/null 2>&1 || true rm -f $LOCK_FILE ok ;; status) if [ -z "$GFS2MTAB" ] && [ -f $LOCK_FILE ]; then echo "GFS2: Found stale lock file $LOCK_FILE" exit 2 fi if [ -n "$GFS2FSTAB" ] && [ -z "$GFS2MTAB" ]; then echo "GFS2: service is not running" exit 3 fi echo "Configured GFS2 mountpoints: " for fs in $GFS2FSTAB; do echo $fs; done echo "Active GFS2 mountpoints: " for fs in $GFS2MTAB; do echo $fs; done ;; condrestart|try-restart) $0 status >/dev/null 2>&1 || exit 0 $0 restart ;; restart|reload|force-reload) $0 stop $0 start ;; *) echo "Usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}" exit 2 ;; esac exit 0 gfs2-utils/gfs2/libgfs2/Makefile.am0000664000175000017500000000154612110647577015774 0ustar andyandyMAINTAINERCLEANFILES = Makefile.in CLEANFILES = parser.h parser.c lexer.c lexer.h BUILT_SOURCES = parser.h lexer.h AM_LFLAGS = --header-file=lexer.h AM_YFLAGS = -d noinst_HEADERS = libgfs2.h lang.h noinst_LTLIBRARIES = libgfs2.la noinst_PROGRAMS = gfs2l libgfs2_la_SOURCES = block_list.c fs_bits.c gfs1.c misc.c rgrp.c super.c \ buf.c fs_geometry.c gfs2_disk_hash.c ondisk.c \ device_geometry.c fs_ops.c gfs2_log.c recovery.c \ structures.c meta.c lang.c parser.y lexer.l libgfs2_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 \ -D_LARGEFILE64_SOURCE \ -D_GNU_SOURCE \ -I$(top_srcdir)/gfs2/include gfs2l_SOURCES = gfs2l.c gfs2l_CPPFLAGS = -I$(top_srcdir)/gfs2/include gfs2l_LDADD = libgfs2.la # Autotools can't handle header files output by flex so we have to generate it manually lexer.h: lexer.l $(LEX) -o lexer.c $(AM_LFLAGS) $^ gfs2-utils/gfs2/libgfs2/block_list.c0000664000175000017500000000253312110647577016226 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include "libgfs2.h" void gfs2_special_free(struct special_blocks *blist) { struct special_blocks *f; while(!osi_list_empty(&blist->list)) { f = osi_list_entry(blist->list.next, struct special_blocks, list); osi_list_del(&f->list); free(f); } } struct special_blocks *blockfind(struct special_blocks *blist, uint64_t num) { osi_list_t *head = &blist->list; osi_list_t *tmp; struct special_blocks *b; for (tmp = head->next; tmp != head; tmp = tmp->next) { b = osi_list_entry(tmp, struct special_blocks, list); if (b->block == num) return b; } return NULL; } void gfs2_special_add(struct special_blocks *blocklist, uint64_t block) { struct special_blocks *b; b = malloc(sizeof(struct special_blocks)); if (b) { memset(b, 0, sizeof(*b)); b->block = block; osi_list_add_prev(&b->list, &blocklist->list); } } void gfs2_special_set(struct special_blocks *blocklist, uint64_t block) { if (blockfind(blocklist, block)) return; gfs2_special_add(blocklist, block); } void gfs2_special_clear(struct special_blocks *blocklist, uint64_t block) { struct special_blocks *b; b = blockfind(blocklist, block); if (b) { osi_list_del(&b->list); free(b); } } gfs2-utils/gfs2/libgfs2/buf.c0000664000175000017500000000443512144433405014646 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "libgfs2.h" struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num) { struct gfs2_buffer_head *bh; bh = calloc(1, sizeof(struct gfs2_buffer_head) + sdp->bsize); if (bh == NULL) return NULL; bh->b_blocknr = num; bh->sdp = sdp; bh->iov.iov_base = (char *)bh + sizeof(struct gfs2_buffer_head); bh->iov.iov_len = sdp->bsize; return bh; } int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, uint64_t block, int line, const char *caller) { struct iovec *iov = alloca(n * sizeof(struct iovec)); struct iovec *iovbase = iov; uint64_t b = block; size_t size = 0; size_t i; int ret; for (i = 0; i < n; i++) { bhs[i] = bget(sdp, b++); if (bhs[i] == NULL) return -1; *iov++ = bhs[i]->iov; size += bhs[i]->iov.iov_len; } ret = preadv(sdp->device_fd, iovbase, n, block * sdp->bsize); if (ret != size) { fprintf(stderr, "bad read: %s from %s:%d: block " "%llu (0x%llx)\n", strerror(errno), caller, line, (unsigned long long)block, (unsigned long long)block); exit(-1); } return 0; } struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line, const char *caller) { struct gfs2_buffer_head *bh; int ret; ret = __breadm(sdp, &bh, 1, num, line, caller); if (ret >= 0) return bh; return NULL; } int bwrite(struct gfs2_buffer_head *bh) { struct gfs2_sbd *sdp = bh->sdp; if (pwritev(sdp->device_fd, &bh->iov, 1, bh->b_blocknr * sdp->bsize) != bh->iov.iov_len) return -1; bh->b_modified = 0; return 0; } int brelse(struct gfs2_buffer_head *bh) { int error = 0; if (bh->b_blocknr == -1) printf("Double free!\n"); if (bh->b_modified) error = bwrite(bh); bh->b_blocknr = -1; if (bh->b_altlist.next && !osi_list_empty(&bh->b_altlist)) osi_list_del(&bh->b_altlist); free(bh); return error; } uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh) { const struct gfs2_meta_header *mh = lbh->iov.iov_base; if (be32_to_cpu(mh->mh_magic) == GFS2_MAGIC) return be32_to_cpu(mh->mh_type); return 0; } gfs2-utils/gfs2/libgfs2/device_geometry.c0000664000175000017500000000417712113704007017242 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libgfs2.h" #ifndef BLKSSZGET #define BLKSSZGET _IO(0x12,104) /* logical_block_size */ #endif #ifndef BLKIOMIN #define BLKIOMIN _IO(0x12,120) /* minimum_io_size */ #endif #ifndef BLKIOOPT #define BLKIOOPT _IO(0x12,121) /* optimal_io_size */ #endif #ifndef BLKALIGNOFF #define BLKALIGNOFF _IO(0x12,122) /* alignment_offset */ #endif #ifndef BLKPBSZGET #define BLKPBSZGET _IO(0x12,123) /* physical_block_size */ #endif int lgfs2_get_dev_info(int fd, struct lgfs2_dev_info *i) { int ret; int ro = 0; off_t off; memset(i, 0, sizeof(*i)); ret = fstat(fd, &i->stat); if (ret < 0) return ret; switch (i->stat.st_mode & S_IFMT) { case S_IFREG: i->size = i->stat.st_size; ret = fcntl(fd, F_GETFL, 0); if (ret & O_RDONLY) i->readonly = 1; i->io_optimal_size = i->stat.st_blksize; goto size_check; case S_IFBLK: break; default: errno = ENOTBLK; return -1; } ioctl(fd, BLKRAGET, &i->ra_pages); ioctl(fd, BLKBSZGET, &i->soft_block_size); ioctl(fd, BLKSSZGET, &i->logical_block_size); ioctl(fd, BLKIOMIN, &i->io_min_size); ioctl(fd, BLKIOOPT, &i->io_optimal_size); ioctl(fd, BLKPBSZGET, &i->physical_block_size); ioctl(fd, BLKALIGNOFF, &i->io_align_offset); ioctl(fd, BLKROGET, &ro); if (ro) i->readonly = 1; off = lseek(fd, 0, SEEK_END); if (off < 0) return -1; i->size = off; size_check: if (i->size < (1 << 20)) { errno = ENOSPC; return -1; } return 0; } /** * fix_device_geometry - round off address and lengths and convert to FS blocks * @sdp: The super block * */ void fix_device_geometry(struct gfs2_sbd *sdp) { struct device *device = &sdp->device; device->length = sdp->dinfo.size / sdp->bsize; if (sdp->debug) { printf("\nDevice Geometry: (in FS blocks)\n"); printf(" length = %"PRIu64"\n", device->length); printf("\nDevice Size: %"PRIu64"\n", sdp->dinfo.size); } } gfs2-utils/gfs2/libgfs2/fs_bits.c0000664000175000017500000001346512110647577015540 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include "libgfs2.h" #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) #define LBITSKIP00 (0x00000000UL) #else #define LBITMASK (0x5555555555555555UL) #define LBITSKIP55 (0x5555555555555555UL) #define LBITSKIP00 (0x0000000000000000UL) #endif #define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) /** * gfs2_bit_search * @ptr: Pointer to bitmap data * @mask: Mask to use (normally 0x55555.... but adjusted for search start) * @state: The state we are searching for * * We xor the bitmap data with a patter which is the bitwise opposite * of what we are looking for, this gives rise to a pattern of ones * wherever there is a match. Since we have two bits per entry, we * take this pattern, shift it down by one place and then and it with * the original. All the even bit positions (0,2,4, etc) then represent * successful matches, so we mask with 0x55555..... to remove the unwanted * odd bit positions. * * This allows searching of a whole u64 at once (32 blocks) with a * single test (on 64 bit arches). */ static inline uint64_t gfs2_bit_search(const unsigned long long *ptr, unsigned long long mask, uint8_t state) { unsigned long long tmp; static const unsigned long long search[] = { [0] = 0xffffffffffffffffULL, [1] = 0xaaaaaaaaaaaaaaaaULL, [2] = 0x5555555555555555ULL, [3] = 0x0000000000000000ULL, }; tmp = le64_to_cpu(*ptr) ^ search[state]; tmp &= (tmp >> 1); tmp &= mask; return tmp; } /** * gfs2_bitfit - Find a free block in the bitmaps * @buffer: the buffer that holds the bitmaps * @buflen: the length (in bytes) of the buffer * @goal: the block to try to allocate * @old_state: the state of the block we're looking for * * Return: the block number that was allocated */ unsigned long gfs2_bitfit(const unsigned char *buf, const unsigned int len, unsigned long goal, unsigned char state) { unsigned long spoint = (goal << 1) & ((8 * sizeof(unsigned long long)) - 1); const unsigned long long *ptr = ((unsigned long long *)buf) + (goal >> 5); const unsigned long long *end = (unsigned long long *) (buf + ALIGN(len, sizeof(unsigned long long))); unsigned long long tmp; unsigned long long mask = 0x5555555555555555ULL; unsigned long bit; if (state > 3) return 0; /* Mask off bits we don't care about at the start of the search */ mask <<= spoint; tmp = gfs2_bit_search(ptr, mask, state); ptr++; while(tmp == 0 && ptr < end) { tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state); ptr++; } /* Mask off any bits which are more than len bytes from the start */ if (ptr == end && (len & (sizeof(unsigned long long) - 1))) tmp &= (((unsigned long long)~0) >> (64 - 8 * (len & (sizeof(unsigned long long) - 1)))); /* Didn't find anything, so return */ if (tmp == 0) return BFITNOENT; ptr--; bit = ffsll(tmp); bit /= 2; /* two bits per entry in the bitmap */ return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; } /* * check_range - check if blkno is within FS limits * @sdp: super block * @blkno: block number * * Returns: 0 if ok, -1 if out of bounds */ int gfs2_check_range(struct gfs2_sbd *sdp, uint64_t blkno) { if((blkno > sdp->fssize) || (blkno <= sdp->sb_addr)) return -1; return 0; } /* * gfs2_set_bitmap * @sdp: super block * @blkno: block number relative to file system * @state: one of three possible states * * This function sets the value of a bit of the * file system bitmap. * * Returns: 0 on success, -1 on error */ int gfs2_set_bitmap(struct gfs2_sbd *sdp, uint64_t blkno, int state) { int buf; uint32_t rgrp_block; struct gfs2_bitmap *bits = NULL; struct rgrp_tree *rgd; unsigned char *byte, cur_state; unsigned int bit; /* FIXME: should GFS2_BLKST_INVALID be allowed */ if ((state < GFS2_BLKST_FREE) || (state > GFS2_BLKST_DINODE)) return -1; rgd = gfs2_blk2rgrpd(sdp, blkno); if(!rgd || blkno < rgd->ri.ri_data0) return -1; rgrp_block = (uint32_t)(blkno - rgd->ri.ri_data0); for(buf= 0; buf < rgd->ri.ri_length; buf++){ bits = &(rgd->bits[buf]); if(rgrp_block < ((bits->bi_start + bits->bi_len)*GFS2_NBBY)) break; } if (bits == NULL) return -1; byte = (unsigned char *)(rgd->bh[buf]->b_data + bits->bi_offset) + (rgrp_block/GFS2_NBBY - bits->bi_start); bit = (rgrp_block % GFS2_NBBY) * GFS2_BIT_SIZE; cur_state = (*byte >> bit) & GFS2_BIT_MASK; *byte ^= cur_state << bit; *byte |= state << bit; bmodified(rgd->bh[buf]); return 0; } /* * gfs2_get_bitmap - get value of FS bitmap * @sdp: super block * @blkno: block number relative to file system * * This function gets the value of a bit of the * file system bitmap. * Possible state values for a block in the bitmap are: * GFS_BLKST_FREE (0) * GFS_BLKST_USED (1) * GFS_BLKST_INVALID (2) * GFS_BLKST_DINODE (3) * * Returns: state on success, -1 on error */ int lgfs2_get_bitmap(struct gfs2_sbd *sdp, uint64_t blkno, struct rgrp_tree *rgd) { uint64_t offset; uint32_t i = 0; char *byte; unsigned int bit; if (rgd == NULL) { rgd = gfs2_blk2rgrpd(sdp, blkno); if(rgd == NULL) return -1; } offset = blkno - rgd->ri.ri_data0; if (offset > UINT_MAX) { errno = EINVAL; return -1; } if (offset >= rgd->ri.ri_data0 + rgd->ri.ri_data) { errno = E2BIG; return -1; } if (offset >= (rgd->bits->bi_start + rgd->bits->bi_len) * GFS2_NBBY) { offset += (sizeof(struct gfs2_rgrp) - sizeof(struct gfs2_meta_header)) * GFS2_NBBY; i = offset / sdp->sd_blocks_per_bitmap; offset -= i * sdp->sd_blocks_per_bitmap; } if (!rgd->bh || !rgd->bh[i]) return GFS2_BLKST_FREE; byte = (rgd->bh[i]->b_data + rgd->bits[i].bi_offset) + (offset/GFS2_NBBY); bit = (offset % GFS2_NBBY) * GFS2_BIT_SIZE; return (*byte >> bit) & GFS2_BIT_MASK; } gfs2-utils/gfs2/libgfs2/fs_geometry.c0000664000175000017500000001442212144433405016412 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include "libgfs2.h" #define DIV_RU(x, y) (((x) + (y) - 1) / (y)) /** * how_many_rgrps - figure out how many RG to put in a subdevice * @w: the command line * @dev: the device * * Returns: the number of RGs */ uint64_t how_many_rgrps(struct gfs2_sbd *sdp, struct device *dev, int rgsize_specified) { uint64_t nrgrp; uint32_t rgblocks1, rgblocksn, bitblocks1, bitblocksn; int bitmap_overflow = 0; while (TRUE) { nrgrp = DIV_RU(dev->length, (sdp->rgsize << 20) / sdp->bsize); /* check to see if the rg length overflows max # bitblks */ bitblocksn = rgblocks2bitblocks(sdp->bsize, dev->length / nrgrp, &rgblocksn); /* calculate size of the first rgrp */ bitblocks1 = rgblocks2bitblocks(sdp->bsize, dev->length - (nrgrp - 1) * (dev->length / nrgrp), &rgblocks1); if (bitblocks1 > 2149 || bitblocksn > 2149) { bitmap_overflow = 1; if (sdp->rgsize <= GFS2_DEFAULT_RGSIZE) { fprintf(stderr, "error: It is not possible " "to use the entire device with " "block size %u bytes.\n", sdp->bsize); exit(-1); } sdp->rgsize -= GFS2_DEFAULT_RGSIZE; /* smaller rgs */ continue; } if (bitmap_overflow || rgsize_specified || /* If user specified an rg size or */ nrgrp <= GFS2_EXCESSIVE_RGS || /* not an excessive # or */ sdp->rgsize >= 2048) /* we reached the max rg size */ break; sdp->rgsize += GFS2_DEFAULT_RGSIZE; /* bigger rgs */ } if (sdp->debug) printf(" rg sz = %"PRIu32"\n nrgrp = %"PRIu64"\n", sdp->rgsize, nrgrp); return nrgrp; } /** * compute_rgrp_layout - figure out where the RG in a FS are * @w: the command line * * Returns: a list of rgrp_list_t structures */ void compute_rgrp_layout(struct gfs2_sbd *sdp, struct osi_root *rgtree, int rgsize_specified) { struct device *dev; struct rgrp_tree *rl, *rlast = NULL; struct osi_node *n, *next = NULL; unsigned int rgrp = 0, nrgrp, rglength; uint64_t rgaddr; sdp->new_rgrps = 0; dev = &sdp->device; /* If this is a new file system, compute the length and number */ /* of rgs based on the size of the device. */ /* If we have existing RGs (i.e. gfs2_grow) find the last one. */ if (!rgtree->osi_node) { dev->length -= sdp->sb_addr + 1; nrgrp = how_many_rgrps(sdp, dev, rgsize_specified); rglength = dev->length / nrgrp; sdp->new_rgrps = nrgrp; } else { uint64_t old_length, new_chunk; log_info("Existing resource groups:\n"); for (rgrp = 0, n = osi_first(rgtree); n; n = next, rgrp++) { next = osi_next(n); rl = (struct rgrp_tree *)n; log_info("%d: start: %" PRIu64 " (0x%" PRIx64 "), length = %"PRIu64" (0x%" PRIx64 ")\n", rgrp + 1, rl->start, rl->start, rl->length, rl->length); rlast = rl; } rlast->start = rlast->ri.ri_addr; rglength = rgrp_size(rlast); rlast->length = rglength; old_length = rlast->ri.ri_addr + rglength; new_chunk = dev->length - old_length; sdp->new_rgrps = new_chunk / rglength; nrgrp = rgrp + sdp->new_rgrps; } if (rgrp < nrgrp) log_info("\nNew resource groups:\n"); for (; rgrp < nrgrp; rgrp++) { if (rgrp) { rgaddr = rlast->start + rlast->length; rl = rgrp_insert(rgtree, rgaddr); rl->length = rglength; } else { rgaddr = sdp->sb_addr + 1; rl = rgrp_insert(rgtree, rgaddr); rl->length = dev->length - (nrgrp - 1) * (dev->length / nrgrp); } rl->start = rgaddr; log_info("%d: start: %" PRIu64 " (0x%" PRIx64 "), length = %"PRIu64" (0x%" PRIx64 ")\n", rgrp + 1, rl->start, rl->start, rl->length, rl->length); rlast = rl; } sdp->rgrps = nrgrp; } /** * Given a number of blocks in a resource group, return the number of blocks * needed for bitmaps. Also calculate the adjusted number of free data blocks * in the resource group and store it in *ri_data. */ uint32_t rgblocks2bitblocks(const unsigned int bsize, const uint32_t rgblocks, uint32_t *ri_data) { uint32_t mappable = 0; uint32_t bitblocks = 0; /* Number of blocks mappable by bitmap blocks with these header types */ const uint32_t blks_rgrp = GFS2_NBBY * (bsize - sizeof(struct gfs2_rgrp)); const uint32_t blks_meta = GFS2_NBBY * (bsize - sizeof(struct gfs2_meta_header)); while (blks_rgrp + (blks_meta * bitblocks) < ((rgblocks - bitblocks) & ~(uint32_t)3)) bitblocks++; if (bitblocks > 0) mappable = blks_rgrp + (blks_meta * (bitblocks - 1)); *ri_data = (rgblocks - (bitblocks + 1)) & ~(uint32_t)3; if (mappable < *ri_data) bitblocks++; return bitblocks; } /** * build_rgrps - write a bunch of resource groups to disk. * If fd > 0, write the data to the given file handle. * Otherwise, use gfs2 buffering in buf.c. */ void build_rgrps(struct gfs2_sbd *sdp, int do_write) { struct osi_node *n, *next = NULL; struct rgrp_tree *rl; uint32_t rgblocks, bitblocks; struct gfs2_rindex *ri; struct gfs2_meta_header mh; unsigned int x; mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_RB; mh.mh_format = GFS2_FORMAT_RB; if (do_write) n = osi_first(&sdp->rgtree); else n = osi_first(&sdp->rgcalc); for (; n; n = next) { next = osi_next(n); rl = (struct rgrp_tree *)n; ri = &rl->ri; bitblocks = rgblocks2bitblocks(sdp->bsize, rl->length, &rgblocks); ri->ri_addr = rl->start; ri->ri_length = bitblocks; ri->ri_data0 = rl->start + bitblocks; ri->ri_data = rgblocks; ri->ri_bitbytes = rgblocks / GFS2_NBBY; memset(&rl->rg, 0, sizeof(rl->rg)); rl->rg.rg_header.mh_magic = GFS2_MAGIC; rl->rg.rg_header.mh_type = GFS2_METATYPE_RG; rl->rg.rg_header.mh_format = GFS2_FORMAT_RG; rl->rg.rg_free = rgblocks; if (gfs2_compute_bitstructs(sdp->sd_sb.sb_bsize, rl)) { fprintf(stderr, "%s: Unable to build resource groups " "with these characteristics.\n", __FUNCTION__); exit(-1); } if (do_write) { for (x = 0; x < bitblocks; x++) { rl->bh[x] = bget(sdp, rl->start + x); if (x) gfs2_meta_header_out_bh(&mh, rl->bh[x]); else gfs2_rgrp_out_bh(&rl->rg, rl->bh[x]); } } if (sdp->debug) { printf("\n"); gfs2_rindex_print(ri); } sdp->blks_total += rgblocks; sdp->fssize = ri->ri_data0 + ri->ri_data; } } gfs2-utils/gfs2/libgfs2/fs_ops.c0000664000175000017500000012642512157554250015375 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include "libgfs2.h" #define DATA (1) #define META (2) #define DINODE (3) static __inline__ uint64_t *metapointer(struct gfs2_buffer_head *bh, unsigned int height, struct metapath *mp) { unsigned int head_size = (height > 0) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height]; } /* Detect directory is a stuffed inode */ static int inode_is_stuffed(struct gfs2_inode *ip) { return !ip->i_di.di_height; } struct gfs2_inode *lgfs2_inode_get(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh) { struct gfs2_inode *ip; ip = calloc(1, sizeof(struct gfs2_inode)); if (ip == NULL) { return NULL; } gfs2_dinode_in(&ip->i_di, bh); ip->i_bh = bh; ip->i_sbd = sdp; return ip; } struct gfs2_inode *lgfs2_inode_read(struct gfs2_sbd *sdp, uint64_t di_addr) { struct gfs2_inode *ip; struct gfs2_buffer_head *bh = bread(sdp, di_addr); if (bh == NULL) { return NULL; } ip = lgfs2_inode_get(sdp, bh); if (ip == NULL) { brelse(bh); return NULL; } ip->bh_owned = 1; /* We did the bread so we own the bh */ return ip; } struct gfs2_inode *is_system_inode(struct gfs2_sbd *sdp, uint64_t block) { int j; if (sdp->md.inum && block == sdp->md.inum->i_di.di_num.no_addr) return sdp->md.inum; if (sdp->md.statfs && block == sdp->md.statfs->i_di.di_num.no_addr) return sdp->md.statfs; if (sdp->md.jiinode && block == sdp->md.jiinode->i_di.di_num.no_addr) return sdp->md.jiinode; if (sdp->md.riinode && block == sdp->md.riinode->i_di.di_num.no_addr) return sdp->md.riinode; if (sdp->md.qinode && block == sdp->md.qinode->i_di.di_num.no_addr) return sdp->md.qinode; if (sdp->md.pinode && block == sdp->md.pinode->i_di.di_num.no_addr) return sdp->md.pinode; if (sdp->md.rooti && block == sdp->md.rooti->i_di.di_num.no_addr) return sdp->md.rooti; if (sdp->master_dir && block == sdp->master_dir->i_di.di_num.no_addr) return sdp->master_dir; for (j = 0; j < sdp->md.journals; j++) if (sdp->md.journal && sdp->md.journal[j] && block == sdp->md.journal[j]->i_di.di_num.no_addr) return sdp->md.journal[j]; return NULL; } void inode_put(struct gfs2_inode **ip_in) { struct gfs2_inode *ip = *ip_in; uint64_t block = ip->i_di.di_num.no_addr; struct gfs2_sbd *sdp = ip->i_sbd; if (ip->i_bh->b_modified) { gfs2_dinode_out(&ip->i_di, ip->i_bh); if (!ip->bh_owned && is_system_inode(sdp, block)) fprintf(stderr, "Warning: Change made to inode " "were discarded.\n"); /* This is for debugging only: a convenient place to set a breakpoint. This means a system inode was modified but not written. That's not fatal: some places like adjust_inode in gfs2_convert will do this on purpose. It can also point out a coding problem, but we don't want to raise alarm in the users either. */ } if (ip->bh_owned) brelse(ip->i_bh); ip->i_bh = NULL; free(ip); *ip_in = NULL; /* make sure the memory isn't accessed again */ } static int blk_alloc_in_rg(struct gfs2_sbd *sdp, unsigned int type, struct rgrp_tree *rl, uint64_t *blkno) { struct gfs2_rindex *ri; struct gfs2_rgrp *rg; unsigned int block, bn = 0, x = 0, y = 0; unsigned int state; unsigned int release = 0; struct gfs2_buffer_head *bh; if (rl == NULL || rl->rg.rg_free == 0) { errno = ENOSPC; return -1; } if (rl->bh[0] == NULL) { if (gfs2_rgrp_read(sdp, rl) != 0) return -1; release = 1; } ri = &rl->ri; rg = &rl->rg; for (block = 0; block < ri->ri_length; block++) { bh = rl->bh[block]; x = (block) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_rgrp); for (; x < sdp->bsize; x++) for (y = 0; y < GFS2_NBBY; y++) { state = (bh->b_data[x] >> (GFS2_BIT_SIZE * y)) & 0x03; if (state == GFS2_BLKST_FREE) goto found; bn++; } } fprintf(stderr, "allocation is broken (1): %"PRIu64" %u\n", (uint64_t)rl->ri.ri_addr, rl->rg.rg_free); goto out_err; found: if (bn >= ri->ri_bitbytes * GFS2_NBBY) { fprintf(stderr, "allocation is broken (2): bn: %u %u rgrp: %"PRIu64 " (0x%" PRIx64 ") Free:%u\n", bn, ri->ri_bitbytes * GFS2_NBBY, (uint64_t)rl->ri.ri_addr, (uint64_t)rl->ri.ri_addr, rl->rg.rg_free); goto out_err; } switch (type) { case DATA: case META: state = GFS2_BLKST_USED; break; case DINODE: state = GFS2_BLKST_DINODE; rg->rg_dinodes++; break; default: fprintf(stderr, "bad state\n"); goto out_err; } bh->b_data[x] &= ~(0x03 << (GFS2_BIT_SIZE * y)); bh->b_data[x] |= state << (GFS2_BIT_SIZE * y); rg->rg_free--; bmodified(bh); if (sdp->gfs1) gfs_rgrp_out((struct gfs_rgrp *)rg, rl->bh[0]); else gfs2_rgrp_out_bh(rg, rl->bh[0]); sdp->blks_alloced++; *blkno = ri->ri_data0 + bn; if (release) gfs2_rgrp_relse(rl); return 0; out_err: if (release) gfs2_rgrp_relse(rl); return -1; } /** * Do not use this function, it's only here until we can kill it. * Use blk_alloc_in_rg directly instead. */ static uint64_t blk_alloc_i(struct gfs2_sbd *sdp, unsigned int type) { int ret; uint64_t blkno = 0; struct osi_node *n = NULL; for (n = osi_first(&sdp->rgtree); n; n = osi_next(n)) { if (((struct rgrp_tree *)n)->rg.rg_free) break; } ret = blk_alloc_in_rg(sdp, type, (struct rgrp_tree *)n, &blkno); if (ret != 0) /* Do what the old blk_alloc_i did */ exit(1); return blkno; } uint64_t data_alloc(struct gfs2_inode *ip) { uint64_t x; x = blk_alloc_i(ip->i_sbd, DATA); ip->i_di.di_goal_data = x; bmodified(ip->i_bh); return x; } uint64_t meta_alloc(struct gfs2_inode *ip) { uint64_t x; x = blk_alloc_i(ip->i_sbd, META); ip->i_di.di_goal_meta = x; bmodified(ip->i_bh); return x; } /** * Allocate a dinode block in a bitmap. In order to plan ahead we look for a * resource group with blksreq free blocks but only allocate the one dinode block. * Returns 0 on success with the allocated block number in *blkno or non-zero otherwise. */ int lgfs2_dinode_alloc(struct gfs2_sbd *sdp, const uint64_t blksreq, uint64_t *blkno) { int ret; struct rgrp_tree *rgt = NULL; struct osi_node *n = NULL; for (n = osi_first(&sdp->rgtree); n; n = osi_next(n)) { rgt = (struct rgrp_tree *)n; if (rgt->rg.rg_free >= blksreq) break; } if (rgt == NULL) return -1; ret = blk_alloc_in_rg(sdp, DINODE, rgt, blkno); if (ret == 0) sdp->dinodes_alloced++; return ret; } static __inline__ void buffer_clear_tail(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh, int head) { memset(bh->b_data + head, 0, sdp->bsize - head); bmodified(bh); } static __inline__ void buffer_copy_tail(struct gfs2_sbd *sdp, struct gfs2_buffer_head *to_bh, int to_head, struct gfs2_buffer_head *from_bh, int from_head) { memcpy(to_bh->b_data + to_head, from_bh->b_data + from_head, sdp->bsize - from_head); memset(to_bh->b_data + sdp->bsize + to_head - from_head, 0, from_head - to_head); bmodified(to_bh); } void unstuff_dinode(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *bh; uint64_t block = 0; int isdir = S_ISDIR(ip->i_di.di_mode) || is_gfs_dir(&ip->i_di); if (ip->i_di.di_size) { if (isdir) { struct gfs2_meta_header mh; block = meta_alloc(ip); bh = bget(sdp, block); mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_JD; mh.mh_format = GFS2_FORMAT_JD; gfs2_meta_header_out_bh(&mh, bh); buffer_copy_tail(sdp, bh, sizeof(struct gfs2_meta_header), ip->i_bh, sizeof(struct gfs2_dinode)); brelse(bh); } else { block = data_alloc(ip); bh = bget(sdp, block); buffer_copy_tail(sdp, bh, 0, ip->i_bh, sizeof(struct gfs2_dinode)); brelse(bh); } } buffer_clear_tail(sdp, ip->i_bh, sizeof(struct gfs2_dinode)); if (ip->i_di.di_size) { *(uint64_t *)(ip->i_bh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_be64(block); /* no need: bmodified(ip->i_bh); buffer_clear_tail does it */ ip->i_di.di_blocks++; } ip->i_di.di_height = 1; } unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) { struct gfs2_sbd *sdp = ip->i_sbd; uint64_t *arr; unsigned int max, height; if (ip->i_di.di_size > size) size = ip->i_di.di_size; if (S_ISDIR(ip->i_di.di_mode)) { arr = sdp->sd_jheightsize; max = sdp->sd_max_jheight; } else { arr = sdp->sd_heightsize; max = sdp->sd_max_height; } for (height = 0; height < max; height++) if (arr[height] >= size) break; return height; } void build_height(struct gfs2_inode *ip, int height) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *bh; uint64_t block = 0, *bp; unsigned int x; int new_block; while (ip->i_di.di_height < height) { new_block = FALSE; bp = (uint64_t *)(ip->i_bh->b_data + sizeof(struct gfs2_dinode)); for (x = 0; x < sdp->sd_diptrs; x++, bp++) if (*bp) { new_block = TRUE; break; } if (new_block) { struct gfs2_meta_header mh; block = meta_alloc(ip); bh = bget(sdp, block); mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_IN; mh.mh_format = GFS2_FORMAT_IN; gfs2_meta_header_out_bh(&mh, bh); buffer_copy_tail(sdp, bh, sizeof(struct gfs2_meta_header), ip->i_bh, sizeof(struct gfs2_dinode)); brelse(bh); } buffer_clear_tail(sdp, ip->i_bh, sizeof(struct gfs2_dinode)); if (new_block) { *(uint64_t *)(ip->i_bh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_be64(block); /* no need: bmodified(ip->i_bh);*/ ip->i_di.di_blocks++; } ip->i_di.di_height++; } } struct metapath *find_metapath(struct gfs2_inode *ip, uint64_t block) { struct gfs2_sbd *sdp = ip->i_sbd; struct metapath *mp; uint64_t b = block; unsigned int i; mp = calloc(1, sizeof(struct metapath)); if (mp == NULL) { fprintf(stderr, "Out of memory in %s\n", __FUNCTION__); exit(-1); } for (i = ip->i_di.di_height; i--;) { mp->mp_list[i] = b % sdp->sd_inptrs; b /= sdp->sd_inptrs; } return mp; } void lookup_block(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, unsigned int height, struct metapath *mp, int create, int *new, uint64_t *block) { uint64_t *ptr = metapointer(bh, height, mp); if (*ptr) { *block = be64_to_cpu(*ptr); return; } *block = 0; if (!create) return; if (height == ip->i_di.di_height - 1&& !(S_ISDIR(ip->i_di.di_mode))) *block = data_alloc(ip); else *block = meta_alloc(ip); *ptr = cpu_to_be64(*block); bmodified(bh); ip->i_di.di_blocks++; bmodified(ip->i_bh); *new = 1; } void block_map(struct gfs2_inode *ip, uint64_t lblock, int *new, uint64_t *dblock, uint32_t *extlen, int prealloc) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *bh; struct metapath *mp; int create = *new; unsigned int bsize; unsigned int height; unsigned int end_of_metadata; unsigned int x; *new = 0; *dblock = 0; if (extlen) *extlen = 0; if (inode_is_stuffed(ip)) { if (!lblock) { *dblock = ip->i_di.di_num.no_addr; if (extlen) *extlen = 1; } return; } bsize = (S_ISDIR(ip->i_di.di_mode)) ? sdp->sd_jbsize : sdp->bsize; height = calc_tree_height(ip, (lblock + 1) * bsize); if (ip->i_di.di_height < height) { if (!create) return; build_height(ip, height); } mp = find_metapath(ip, lblock); end_of_metadata = ip->i_di.di_height - 1; bh = ip->i_bh; for (x = 0; x < end_of_metadata; x++) { lookup_block(ip, bh, x, mp, create, new, dblock); if (bh != ip->i_bh) brelse(bh); if (!*dblock) goto out; if (*new) { struct gfs2_meta_header mh; bh = bget(sdp, *dblock); mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_IN; mh.mh_format = GFS2_FORMAT_IN; gfs2_meta_header_out_bh(&mh, bh); } else { if (*dblock == ip->i_di.di_num.no_addr) bh = ip->i_bh; else bh = bread(sdp, *dblock); } } if (!prealloc) lookup_block(ip, bh, end_of_metadata, mp, create, new, dblock); if (extlen && *dblock) { *extlen = 1; if (!*new) { uint64_t tmp_dblock; int tmp_new; unsigned int nptrs; nptrs = (end_of_metadata) ? sdp->sd_inptrs : sdp->sd_diptrs; while (++mp->mp_list[end_of_metadata] < nptrs) { lookup_block(ip, bh, end_of_metadata, mp, FALSE, &tmp_new, &tmp_dblock); if (*dblock + *extlen != tmp_dblock) break; (*extlen)++; } } } if (bh != ip->i_bh) brelse(bh); out: free(mp); } static void copy2mem(struct gfs2_buffer_head *bh, void **buf, unsigned int offset, unsigned int size) { char **p = (char **)buf; if (bh) memcpy(*p, bh->b_data + offset, size); else memset(*p, 0, size); *p += size; } int gfs2_readi(struct gfs2_inode *ip, void *buf, uint64_t offset, unsigned int size) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *bh; uint64_t lblock, dblock; unsigned int o; uint32_t extlen = 0; unsigned int amount; int not_new = 0; int isdir = !!(S_ISDIR(ip->i_di.di_mode)); int journaled = ip->i_di.di_flags & GFS2_DIF_JDATA; int copied = 0; if (offset >= ip->i_di.di_size) return 0; if ((offset + size) > ip->i_di.di_size) size = ip->i_di.di_size - offset; if (!size) return 0; if ((sdp->gfs1 && journaled) || (!sdp->gfs1 && isdir)) { lblock = offset; o = lblock % sdp->sd_jbsize; lblock /= sdp->sd_jbsize; } else { lblock = offset >> sdp->sd_sb.sb_bsize_shift; o = offset & (sdp->bsize - 1); } if (inode_is_stuffed(ip)) o += sizeof(struct gfs2_dinode); else if ((sdp->gfs1 && journaled) || (!sdp->gfs1 && isdir)) o += sizeof(struct gfs2_meta_header); while (copied < size) { amount = size - copied; if (amount > sdp->bsize - o) amount = sdp->bsize - o; if (!extlen) { if (sdp->gfs1) gfs1_block_map(ip, lblock, ¬_new, &dblock, &extlen, FALSE); else block_map(ip, lblock, ¬_new, &dblock, &extlen, FALSE); } if (dblock) { if (dblock == ip->i_di.di_num.no_addr) bh = ip->i_bh; else bh = bread(sdp, dblock); dblock++; extlen--; } else bh = NULL; copy2mem(bh, &buf, o, amount); if (bh && bh != ip->i_bh) brelse(bh); copied += amount; lblock++; if (sdp->gfs1) o = (journaled) ? sizeof(struct gfs2_meta_header) : 0; else o = (isdir) ? sizeof(struct gfs2_meta_header) : 0; } return copied; } static void copy_from_mem(struct gfs2_buffer_head *bh, void **buf, unsigned int offset, unsigned int size) { char **p = (char **)buf; memcpy(bh->b_data + offset, *p, size); bmodified(bh); *p += size; } int __gfs2_writei(struct gfs2_inode *ip, void *buf, uint64_t offset, unsigned int size, int resize) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *bh; uint64_t lblock, dblock; unsigned int o; uint32_t extlen = 0; unsigned int amount; int new; int isdir = !!(S_ISDIR(ip->i_di.di_mode)); const uint64_t start = offset; int copied = 0; if (!size) return 0; if (inode_is_stuffed(ip) && ((start + size) > (sdp->bsize - sizeof(struct gfs2_dinode)))) unstuff_dinode(ip); if (isdir) { lblock = offset; o = lblock % sdp->sd_jbsize; lblock /= sdp->sd_jbsize; } else { lblock = offset >> sdp->sd_sb.sb_bsize_shift; o = offset & (sdp->bsize - 1); } if (inode_is_stuffed(ip)) o += sizeof(struct gfs2_dinode); else if (isdir) o += sizeof(struct gfs2_meta_header); while (copied < size) { amount = size - copied; if (amount > sdp->bsize - o) amount = sdp->bsize - o; if (!extlen) { new = TRUE; block_map(ip, lblock, &new, &dblock, &extlen, FALSE); } if (new) { bh = bget(sdp, dblock); if (isdir) { struct gfs2_meta_header mh; mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_JD; mh.mh_format = GFS2_FORMAT_JD; gfs2_meta_header_out_bh(&mh, bh); } } else { if (dblock == ip->i_di.di_num.no_addr) bh = ip->i_bh; else bh = bread(sdp, dblock); } copy_from_mem(bh, &buf, o, amount); if (bh != ip->i_bh) brelse(bh); copied += amount; lblock++; dblock++; extlen--; o = (isdir) ? sizeof(struct gfs2_meta_header) : 0; } if (resize && ip->i_di.di_size < start + copied) { bmodified(ip->i_bh); ip->i_di.di_size = start + copied; } return copied; } struct gfs2_buffer_head *get_file_buf(struct gfs2_inode *ip, uint64_t lbn, int prealloc) { struct gfs2_sbd *sdp = ip->i_sbd; uint64_t dbn; int new = TRUE; if (inode_is_stuffed(ip)) unstuff_dinode(ip); block_map(ip, lbn, &new, &dbn, NULL, prealloc); if (!dbn) { fprintf(stderr, "get_file_buf\n"); exit(1); } if (!prealloc && new && ip->i_di.di_size < (lbn + 1) << sdp->sd_sb.sb_bsize_shift) { bmodified(ip->i_bh); ip->i_di.di_size = (lbn + 1) << sdp->sd_sb.sb_bsize_shift; } if (dbn == ip->i_di.di_num.no_addr) return ip->i_bh; else return bread(sdp, dbn); } int gfs2_dirent_first(struct gfs2_inode *dip, struct gfs2_buffer_head *bh, struct gfs2_dirent **dent) { struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data; if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) { *dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_leaf)); return IS_LEAF; } else { *dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_dinode)); return IS_DINODE; } } int gfs2_dirent_next(struct gfs2_inode *dip, struct gfs2_buffer_head *bh, struct gfs2_dirent **dent) { char *bh_end; uint16_t cur_rec_len; bh_end = bh->b_data + dip->i_sbd->bsize; cur_rec_len = be16_to_cpu((*dent)->de_rec_len); if (cur_rec_len == 0 || (char *)(*dent) + cur_rec_len >= bh_end) return -ENOENT; *dent = (struct gfs2_dirent *)((char *)(*dent) + cur_rec_len); return 0; } /** * Allocate a gfs2 dirent * Returns 0 on success, with *dent_out pointing to the new dirent, * or -1 on failure, with errno set */ static int dirent_alloc(struct gfs2_inode *dip, struct gfs2_buffer_head *bh, int name_len, struct gfs2_dirent **dent_out) { struct gfs2_dirent *dent, *new; unsigned int rec_len = GFS2_DIRENT_SIZE(name_len); unsigned int entries = 0, offset = 0; int type; type = gfs2_dirent_first(dip, bh, &dent); if (type == IS_LEAF) { struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; entries = be16_to_cpu(leaf->lf_entries); offset = sizeof(struct gfs2_leaf); } else { struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data; entries = be32_to_cpu(dinode->di_entries); offset = sizeof(struct gfs2_dinode); } if (!entries) { dent->de_rec_len = cpu_to_be16(dip->i_sbd->bsize - offset); dent->de_name_len = cpu_to_be16(name_len); bmodified(bh); *dent_out = dent; dip->i_di.di_entries++; bmodified(dip->i_bh); return 0; } do { uint16_t cur_rec_len; uint16_t cur_name_len; uint16_t new_rec_len; cur_rec_len = be16_to_cpu(dent->de_rec_len); cur_name_len = be16_to_cpu(dent->de_name_len); if ((!dent->de_inum.no_formal_ino && cur_rec_len >= rec_len) || (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) { if (dent->de_inum.no_formal_ino) { new = (struct gfs2_dirent *)((char *)dent + GFS2_DIRENT_SIZE(cur_name_len)); memset(new, 0, sizeof(struct gfs2_dirent)); new->de_rec_len = cpu_to_be16(cur_rec_len - GFS2_DIRENT_SIZE(cur_name_len)); new->de_name_len = cpu_to_be16(name_len); new_rec_len = be16_to_cpu(new->de_rec_len); dent->de_rec_len = cpu_to_be16(cur_rec_len - new_rec_len); *dent_out = new; bmodified(bh); dip->i_di.di_entries++; bmodified(dip->i_bh); return 0; } dent->de_name_len = cpu_to_be16(name_len); *dent_out = dent; bmodified(bh); dip->i_di.di_entries++; bmodified(dip->i_bh); return 0; } } while (gfs2_dirent_next(dip, bh, &dent) == 0); errno = ENOSPC; return -1; } void dirent2_del(struct gfs2_inode *dip, struct gfs2_buffer_head *bh, struct gfs2_dirent *prev, struct gfs2_dirent *cur) { uint16_t cur_rec_len, prev_rec_len; bmodified(bh); if (gfs2_check_meta(bh, GFS2_METATYPE_LF) == 0) { struct gfs2_leaf *lf = (struct gfs2_leaf *)bh->b_data; lf->lf_entries = be16_to_cpu(lf->lf_entries) - 1; lf->lf_entries = cpu_to_be16(lf->lf_entries); } if (dip->i_di.di_entries) { bmodified(dip->i_bh); dip->i_di.di_entries--; } if (!prev) { cur->de_inum.no_formal_ino = 0; return; } prev_rec_len = be16_to_cpu(prev->de_rec_len); cur_rec_len = be16_to_cpu(cur->de_rec_len); prev_rec_len += cur_rec_len; prev->de_rec_len = cpu_to_be16(prev_rec_len); } void gfs2_get_leaf_nr(struct gfs2_inode *dip, uint32_t lindex, uint64_t *leaf_out) { uint64_t leaf_no; int count; count = gfs2_readi(dip, (char *)&leaf_no, lindex * sizeof(uint64_t), sizeof(uint64_t)); if (count != sizeof(uint64_t)) { fprintf(stderr, "gfs2_get_leaf_nr: Bad internal read.\n"); exit(1); } *leaf_out = be64_to_cpu(leaf_no); } void gfs2_put_leaf_nr(struct gfs2_inode *dip, uint32_t inx, uint64_t leaf_out) { uint64_t leaf_no; int count; if (dip->i_sbd->gfs1) { gfs_put_leaf_nr(dip, inx, leaf_out); return; } leaf_no = cpu_to_be64(leaf_out); count = gfs2_writei(dip, (char *)&leaf_no, inx * sizeof(uint64_t), sizeof(uint64_t)); if (count != sizeof(uint64_t)) { fprintf(stderr, "gfs2_put_leaf_nr: Bad internal write.\n"); exit(1); } } void dir_split_leaf(struct gfs2_inode *dip, uint32_t start, uint64_t leaf_no, struct gfs2_buffer_head *obh) { struct gfs2_buffer_head *nbh; struct gfs2_leaf *nleaf, *oleaf; struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new; uint32_t len, half_len, divider; uint64_t bn, *lp; uint32_t name_len; int x, moved = FALSE; int count; bn = meta_alloc(dip); nbh = bget(dip->i_sbd, bn); { struct gfs2_meta_header mh; mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_LF; mh.mh_format = GFS2_FORMAT_LF; gfs2_meta_header_out_bh(&mh, nbh); buffer_clear_tail(dip->i_sbd, nbh, sizeof(struct gfs2_meta_header)); } nleaf = (struct gfs2_leaf *)nbh->b_data; nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); oleaf = (struct gfs2_leaf *)obh->b_data; len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth)); half_len = len >> 1; lp = calloc(1, half_len * sizeof(uint64_t)); if (lp == NULL) { fprintf(stderr, "Out of memory in %s\n", __FUNCTION__); exit(-1); } for (x = 0; x < half_len; x++) lp[x] = cpu_to_be64(bn); if (dip->i_sbd->gfs1) count = gfs1_writei(dip, (char *)lp, start * sizeof(uint64_t), half_len * sizeof(uint64_t)); else count = gfs2_writei(dip, (char *)lp, start * sizeof(uint64_t), half_len * sizeof(uint64_t)); if (count != half_len * sizeof(uint64_t)) { fprintf(stderr, "dir_split_leaf (2)\n"); exit(1); } free(lp); divider = (start + half_len) << (32 - dip->i_di.di_depth); gfs2_dirent_first(dip, obh, &dent); do { next = dent; if (gfs2_dirent_next(dip, obh, &next)) next = NULL; if (dent->de_inum.no_formal_ino && be32_to_cpu(dent->de_hash) < divider) { name_len = be16_to_cpu(dent->de_name_len); if (dirent_alloc(dip, nbh, name_len, &new)) { fprintf(stderr, "dir_split_leaf (3)\n"); exit(1); } new->de_inum = dent->de_inum; new->de_hash = dent->de_hash; new->de_type = dent->de_type; memcpy((char *)(new + 1), (char *)(dent + 1), name_len); nleaf->lf_entries = be16_to_cpu(nleaf->lf_entries) + 1; nleaf->lf_entries = cpu_to_be16(nleaf->lf_entries); dirent2_del(dip, obh, prev, dent); if (!prev) prev = dent; moved = TRUE; } else prev = dent; dent = next; } while (dent); if (!moved) { if (dirent_alloc(dip, nbh, 0, &new)) { fprintf(stderr, "dir_split_leaf (4)\n"); exit(1); } new->de_inum.no_formal_ino = 0; /* Don't count the sentinel dirent as an entry */ dip->i_di.di_entries--; } oleaf->lf_depth = be16_to_cpu(oleaf->lf_depth) + 1; oleaf->lf_depth = cpu_to_be16(oleaf->lf_depth); nleaf->lf_depth = oleaf->lf_depth; dip->i_di.di_blocks++; bmodified(dip->i_bh); bmodified(obh); /* Need to do this in case nothing was moved */ bmodified(nbh); brelse(nbh); } static void dir_double_exhash(struct gfs2_inode *dip) { struct gfs2_sbd *sdp = dip->i_sbd; uint64_t *buf; uint64_t *from, *to; uint64_t block; int x; int count; buf = calloc(1, 3 * sdp->sd_hash_bsize); if (buf == NULL) { fprintf(stderr, "Out of memory in %s\n", __FUNCTION__); exit(-1); } for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { count = gfs2_readi(dip, (char *)buf, block * sdp->sd_hash_bsize, sdp->sd_hash_bsize); if (count != sdp->sd_hash_bsize) { fprintf(stderr, "dir_double_exhash (1)\n"); exit(1); } from = buf; to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize); for (x = sdp->sd_hash_ptrs; x--; from++) { *to++ = *from; *to++ = *from; } if (sdp->gfs1) count = gfs1_writei(dip, (char *)buf + sdp->sd_hash_bsize, block * sdp->bsize, sdp->bsize); else count = gfs2_writei(dip, (char *)buf + sdp->sd_hash_bsize, block * sdp->bsize, sdp->bsize); if (count != sdp->bsize) { fprintf(stderr, "dir_double_exhash (2)\n"); exit(1); } } free(buf); dip->i_di.di_depth++; bmodified(dip->i_bh); } /** * get_leaf - Get leaf * @dip: * @leaf_no: * @bh_out: * * Returns: 0 on success, error code otherwise */ int gfs2_get_leaf(struct gfs2_inode *dip, uint64_t leaf_no, struct gfs2_buffer_head **bhp) { int error = 0; *bhp = bread(dip->i_sbd, leaf_no); error = gfs2_check_meta(*bhp, GFS2_METATYPE_LF); if(error) brelse(*bhp); return error; } /** * get_first_leaf - Get first leaf * @dip: The GFS2 inode * @index: * @bh_out: * * Returns: 0 on success, error code otherwise */ static int get_first_leaf(struct gfs2_inode *dip, uint32_t lindex, struct gfs2_buffer_head **bh_out) { uint64_t leaf_no; gfs2_get_leaf_nr(dip, lindex, &leaf_no); *bh_out = bread(dip->i_sbd, leaf_no); return 0; } /** * get_next_leaf - Get next leaf * @dip: The GFS2 inode * @bh_in: The buffer * @bh_out: * * Returns: 0 on success, error code otherwise */ static int get_next_leaf(struct gfs2_inode *dip,struct gfs2_buffer_head *bh_in, struct gfs2_buffer_head **bh_out) { struct gfs2_leaf *leaf; leaf = (struct gfs2_leaf *)bh_in->b_data; if (!leaf->lf_next) return -1; *bh_out = bread(dip->i_sbd, be64_to_cpu(leaf->lf_next)); return 0; } static int dir_e_add(struct gfs2_inode *dip, const char *filename, int len, struct gfs2_inum *inum, unsigned int type) { struct gfs2_buffer_head *bh, *nbh; struct gfs2_leaf *leaf, *nleaf; struct gfs2_dirent *dent; uint32_t lindex, llen; uint32_t hash; uint64_t leaf_no, bn; int err = 0; hash = gfs2_disk_hash(filename, len); restart: /* Have to kludge because (hash >> 32) gives hash for some reason. */ if (dip->i_di.di_depth) lindex = hash >> (32 - dip->i_di.di_depth); else lindex = 0; gfs2_get_leaf_nr(dip, lindex, &leaf_no); for (;;) { bh = bread(dip->i_sbd, leaf_no); leaf = (struct gfs2_leaf *)bh->b_data; if (dirent_alloc(dip, bh, len, &dent)) { if (be16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) { llen = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth)); dir_split_leaf(dip, lindex & ~(llen - 1), leaf_no, bh); brelse(bh); goto restart; } else if (dip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) { brelse(bh); dir_double_exhash(dip); goto restart; } else if (leaf->lf_next) { leaf_no = be64_to_cpu(leaf->lf_next); brelse(bh); continue; } else { struct gfs2_meta_header mh; bn = meta_alloc(dip); nbh = bget(dip->i_sbd, bn); mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_LF; mh.mh_format = GFS2_FORMAT_LF; gfs2_meta_header_out_bh(&mh, nbh); leaf->lf_next = cpu_to_be64(bn); nleaf = (struct gfs2_leaf *)nbh->b_data; nleaf->lf_depth = leaf->lf_depth; nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); err = dirent_alloc(dip, nbh, len, &dent); if (err) return err; dip->i_di.di_blocks++; bmodified(dip->i_bh); bmodified(bh); brelse(bh); bh = nbh; leaf = nleaf; } } gfs2_inum_out(inum, (char *)&dent->de_inum); dent->de_hash = cpu_to_be32(hash); dent->de_type = cpu_to_be16(type); memcpy((char *)(dent + 1), filename, len); leaf->lf_entries = be16_to_cpu(leaf->lf_entries) + 1; leaf->lf_entries = cpu_to_be16(leaf->lf_entries); bmodified(bh); brelse(bh); return err; } } static void dir_make_exhash(struct gfs2_inode *dip) { struct gfs2_sbd *sdp = dip->i_sbd; struct gfs2_dirent *dent; struct gfs2_buffer_head *bh; struct gfs2_leaf *leaf; int y; uint32_t x; uint64_t *lp, bn; bn = meta_alloc(dip); bh = bget(sdp, bn); { struct gfs2_meta_header mh; mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_LF; mh.mh_format = GFS2_FORMAT_LF; gfs2_meta_header_out_bh(&mh, bh); } leaf = (struct gfs2_leaf *)bh->b_data; leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries); buffer_copy_tail(sdp, bh, sizeof(struct gfs2_leaf), dip->i_bh, sizeof(struct gfs2_dinode)); x = 0; gfs2_dirent_first(dip, bh, &dent); do { if (!dent->de_inum.no_formal_ino) continue; if (++x == dip->i_di.di_entries) break; } while (gfs2_dirent_next(dip, bh, &dent) == 0); dent->de_rec_len = be16_to_cpu(dent->de_rec_len); dent->de_rec_len = cpu_to_be16(dent->de_rec_len + sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf)); /* no need to: bmodified(bh); (buffer_copy_tail does it) */ brelse(bh); buffer_clear_tail(sdp, dip->i_bh, sizeof(struct gfs2_dinode)); lp = (uint64_t *)(dip->i_bh->b_data + sizeof(struct gfs2_dinode)); for (x = sdp->sd_hash_ptrs; x--; lp++) *lp = cpu_to_be64(bn); dip->i_di.di_size = sdp->bsize / 2; dip->i_di.di_blocks++; dip->i_di.di_flags |= GFS2_DIF_EXHASH; dip->i_di.di_payload_format = 0; /* no need: bmodified(dip->i_bh); buffer_clear_tail does it. */ for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; dip->i_di.di_depth = y; gfs2_dinode_out(&dip->i_di, dip->i_bh); bwrite(dip->i_bh); } static int dir_l_add(struct gfs2_inode *dip, const char *filename, int len, struct gfs2_inum *inum, unsigned int type) { struct gfs2_dirent *dent; int err = 0; if (dirent_alloc(dip, dip->i_bh, len, &dent)) { dir_make_exhash(dip); err = dir_e_add(dip, filename, len, inum, type); return err; } gfs2_inum_out(inum, (char *)&dent->de_inum); dent->de_hash = gfs2_disk_hash(filename, len); dent->de_hash = cpu_to_be32(dent->de_hash); dent->de_type = cpu_to_be16(type); memcpy((char *)(dent + 1), filename, len); bmodified(dip->i_bh); return err; } int dir_add(struct gfs2_inode *dip, const char *filename, int len, struct gfs2_inum *inum, unsigned int type) { int err = 0; if (dip->i_di.di_flags & GFS2_DIF_EXHASH) err = dir_e_add(dip, filename, len, inum, type); else err = dir_l_add(dip, filename, len, inum, type); return err; } static struct gfs2_buffer_head *__init_dinode(struct gfs2_sbd *sdp, struct gfs2_inum *inum, unsigned int mode, uint32_t flags, struct gfs2_inum *parent, int gfs1) { struct gfs2_buffer_head *bh; struct gfs2_dinode di; int is_dir; if (gfs1) is_dir = (IF2DT(mode) == GFS_FILE_DIR); else is_dir = S_ISDIR(mode); bh = bget(sdp, inum->no_addr); memset(&di, 0, sizeof(struct gfs2_dinode)); di.di_header.mh_magic = GFS2_MAGIC; di.di_header.mh_type = GFS2_METATYPE_DI; di.di_header.mh_format = GFS2_FORMAT_DI; di.di_num = *inum; di.di_mode = mode; di.di_nlink = 1; di.di_blocks = 1; di.di_atime = di.di_mtime = di.di_ctime = sdp->time; di.di_goal_meta = di.di_goal_data = bh->b_blocknr; di.di_flags = flags; if (is_dir) { struct gfs2_dirent de1, de2; memset(&de1, 0, sizeof(struct gfs2_dirent)); de1.de_inum = di.di_num; de1.de_hash = gfs2_disk_hash(".", 1); de1.de_rec_len = GFS2_DIRENT_SIZE(1); de1.de_name_len = 1; de1.de_type = (gfs1 ? GFS_FILE_DIR : IF2DT(S_IFDIR)); memset(&de2, 0, sizeof(struct gfs2_dirent)); de2.de_inum = *parent; de2.de_hash = gfs2_disk_hash("..", 2); de2.de_rec_len = sdp->bsize - sizeof(struct gfs2_dinode) - de1.de_rec_len; de2.de_name_len = 2; de2.de_type = (gfs1 ? GFS_FILE_DIR : IF2DT(S_IFDIR)); gfs2_dirent_out(&de1, bh->b_data + sizeof(struct gfs2_dinode)); memcpy(bh->b_data + sizeof(struct gfs2_dinode) + sizeof(struct gfs2_dirent), ".", 1); gfs2_dirent_out(&de2, bh->b_data + sizeof(struct gfs2_dinode) + de1.de_rec_len); memcpy(bh->b_data + sizeof(struct gfs2_dinode) + de1.de_rec_len + sizeof(struct gfs2_dirent), "..", 2); di.di_nlink = 2; di.di_size = sdp->bsize - sizeof(struct gfs2_dinode); di.di_flags |= GFS2_DIF_JDATA; di.di_payload_format = GFS2_FORMAT_DE; di.di_entries = 2; } gfs2_dinode_out(&di, bh); return bh; } struct gfs2_buffer_head *init_dinode(struct gfs2_sbd *sdp, struct gfs2_inum *inum, unsigned int mode, uint32_t flags, struct gfs2_inum *parent) { return __init_dinode(sdp, inum, mode, flags, parent, 0); } static struct gfs2_inode *__createi(struct gfs2_inode *dip, const char *filename, unsigned int mode, uint32_t flags, int if_gfs1) { struct gfs2_sbd *sdp = dip->i_sbd; uint64_t bn; struct gfs2_inum inum; struct gfs2_buffer_head *bh; struct gfs2_inode *ip; int err = 0; int is_dir; gfs2_lookupi(dip, filename, strlen(filename), &ip); if (!ip) { err = lgfs2_dinode_alloc(sdp, 1, &bn); if (err != 0) return NULL; if (if_gfs1) inum.no_formal_ino = bn; else inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = bn; err = dir_add(dip, filename, strlen(filename), &inum, IF2DT(mode)); if (err) return NULL; if (if_gfs1) is_dir = (IF2DT(mode) == GFS_FILE_DIR); else is_dir = S_ISDIR(mode); if (is_dir) { bmodified(dip->i_bh); dip->i_di.di_nlink++; } bh = __init_dinode(sdp, &inum, mode, flags, &dip->i_di.di_num, if_gfs1); ip = lgfs2_inode_get(sdp, bh); if (ip == NULL) return NULL; bmodified(bh); } ip->bh_owned = 1; return ip; } struct gfs2_inode *createi(struct gfs2_inode *dip, const char *filename, unsigned int mode, uint32_t flags) { return __createi(dip, filename, mode, flags, 0); } struct gfs2_inode *gfs_createi(struct gfs2_inode *dip, const char *filename, unsigned int mode, uint32_t flags) { return __createi(dip, filename, mode, flags, 1); } /** * gfs2_filecmp - Compare two filenames * @file1: The first filename * @file2: The second filename * @len_of_file2: The length of the second file * * This routine compares two filenames and returns 1 if they are equal. * * Returns: 1 if the files are the same, otherwise 0. */ static int gfs2_filecmp(const char *file1, const char *file2, int len_of_file2) { if (strlen(file1) != len_of_file2) return 0; if (memcmp(file1, file2, len_of_file2)) return 0; return 1; } /** * leaf_search * @bh: * @id: * @dent_out: * @dent_prev: * * Returns: */ static int leaf_search(struct gfs2_inode *dip, struct gfs2_buffer_head *bh, const char *filename, int len, struct gfs2_dirent **dent_out, struct gfs2_dirent **dent_prev) { uint32_t hash; struct gfs2_dirent *dent, *prev = NULL; unsigned int entries = 0, x = 0; int type; type = gfs2_dirent_first(dip, bh, &dent); if (type == IS_LEAF){ struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; entries = be16_to_cpu(leaf->lf_entries); } else if (type == IS_DINODE) entries = dip->i_di.di_entries; else return -1; hash = gfs2_disk_hash(filename, len); do{ if (!dent->de_inum.no_formal_ino){ prev = dent; continue; } if (be32_to_cpu(dent->de_hash) == hash && gfs2_filecmp(filename, (char *)(dent + 1), be16_to_cpu(dent->de_name_len))) { *dent_out = dent; if (dent_prev) *dent_prev = prev; return 0; } if(x >= entries) return -1; x++; prev = dent; } while (gfs2_dirent_next(dip, bh, &dent) == 0); return -ENOENT; } /** * linked_leaf_search - Linked leaf search * @dip: The GFS2 inode * @id: * @dent_out: * @dent_prev: * @bh_out: * * Returns: 0 on sucess, error code otherwise */ static int linked_leaf_search(struct gfs2_inode *dip, const char *filename, int len, struct gfs2_dirent **dent_out, struct gfs2_buffer_head **bh_out) { struct gfs2_buffer_head *bh = NULL, *bh_next; uint32_t hsize, lindex; uint32_t hash; int error = 0; hsize = 1 << dip->i_di.di_depth; if(hsize * sizeof(uint64_t) != dip->i_di.di_size) return -1; /* Figure out the address of the leaf node. */ hash = gfs2_disk_hash(filename, len); lindex = hash >> (32 - dip->i_di.di_depth); error = get_first_leaf(dip, lindex, &bh_next); if (error) return error; if (bh_next == NULL) return errno; /* Find the entry */ do{ if (bh && bh != dip->i_bh) brelse(bh); bh = bh_next; error = leaf_search(dip, bh, filename, len, dent_out, NULL); switch (error){ case 0: *bh_out = bh; return 0; case -ENOENT: break; default: if (bh && bh != dip->i_bh) brelse(bh); return error; } error = get_next_leaf(dip, bh, &bh_next); } while (!error); if (bh && bh != dip->i_bh) brelse(bh); return error; } /** * dir_e_search - * @dip: The GFS2 inode * @id: * @inode: * * Returns: */ static int dir_e_search(struct gfs2_inode *dip, const char *filename, int len, unsigned int *type, struct gfs2_inum *inum) { struct gfs2_buffer_head *bh = NULL; struct gfs2_dirent *dent; int error; error = linked_leaf_search(dip, filename, len, &dent, &bh); if (error) return error; gfs2_inum_in(inum, (char *)&dent->de_inum); if (type) *type = be16_to_cpu(dent->de_type); brelse(bh); return 0; } /** * dir_l_search - * @dip: The GFS2 inode * @id: * @inode: * * Returns: */ static int dir_l_search(struct gfs2_inode *dip, const char *filename, int len, unsigned int *type, struct gfs2_inum *inum) { struct gfs2_dirent *dent; int error; if(!inode_is_stuffed(dip)) return -1; error = leaf_search(dip, dip->i_bh, filename, len, &dent, NULL); if (!error) { gfs2_inum_in(inum, (char *)&dent->de_inum); if(type) *type = be16_to_cpu(dent->de_type); } return error; } /** * dir_search - Search a directory * @dip: The GFS inode * @id * @type: * * This routine searches a directory for a file or another directory * given its filename. The component of the identifier that is * not being used to search will be filled in and must be freed by * the caller. * * Returns: 0 if found, -1 on failure, -ENOENT if not found. */ int dir_search(struct gfs2_inode *dip, const char *filename, int len, unsigned int *type, struct gfs2_inum *inum) { int error; if(!S_ISDIR(dip->i_di.di_mode) && !is_gfs_dir(&dip->i_di)) return -1; if (dip->i_di.di_flags & GFS2_DIF_EXHASH) error = dir_e_search(dip, filename, len, type, inum); else error = dir_l_search(dip, filename, len, type, inum); return error; } static int dir_e_del(struct gfs2_inode *dip, const char *filename, int len) { int lindex; int error; int found = 0; uint64_t leaf_no; struct gfs2_buffer_head *bh = NULL; struct gfs2_dirent *cur, *prev; lindex = (1 << (dip->i_di.di_depth))-1; for(; (lindex >= 0) && !found; lindex--){ gfs2_get_leaf_nr(dip, lindex, &leaf_no); while(leaf_no && !found){ bh = bread(dip->i_sbd, leaf_no); error = leaf_search(dip, bh, filename, len, &cur, &prev); if (error) { if(error != -ENOENT){ brelse(bh); return -1; } leaf_no = be64_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_next); brelse(bh); } else found = 1; } } if(!found) return 1; if (bh) { dirent2_del(dip, bh, prev, cur); brelse(bh); } return 0; } static int dir_l_del(struct gfs2_inode *dip, const char *filename, int len) { int error=0; struct gfs2_dirent *cur, *prev; if(!inode_is_stuffed(dip)) return -1; error = leaf_search(dip, dip->i_bh, filename, len, &cur, &prev); if (error) { if (error == -ENOENT) return 1; else return -1; } dirent2_del(dip, dip->i_bh, prev, cur); return 0; } /* * gfs2_dirent_del * @dip * filename * * Delete a directory entry from a directory. This _only_ * removes the directory entry - leaving the dinode in * place. (Likely without a link.) * * Returns: 0 on success (or if it doesn't already exist), -1 on failure */ int gfs2_dirent_del(struct gfs2_inode *dip, const char *filename, int len) { int error; if(!S_ISDIR(dip->i_di.di_mode) && !is_gfs_dir(&dip->i_di)) return -1; if (dip->i_di.di_flags & GFS2_DIF_EXHASH) error = dir_e_del(dip, filename, len); else error = dir_l_del(dip, filename, len); bmodified(dip->i_bh); return error; } /** * gfs2_lookupi - Look up a filename in a directory and return its inode * @dip: The directory to search * @name: The name of the inode to look for * @ipp: Used to return the found inode if any * * Returns: 0 on success, -EXXXX on failure */ int gfs2_lookupi(struct gfs2_inode *dip, const char *filename, int len, struct gfs2_inode **ipp) { struct gfs2_sbd *sdp = dip->i_sbd; int error = 0; struct gfs2_inum inum; *ipp = NULL; if (!len || len > GFS2_FNAMESIZE) return -ENAMETOOLONG; if (gfs2_filecmp(filename, (char *)".", 1)) { *ipp = dip; return 0; } error = dir_search(dip, filename, len, NULL, &inum); if (!error) *ipp = lgfs2_inode_read(sdp, inum.no_addr); return error; } /** * gfs2_free_block - free up a block given its block number */ void gfs2_free_block(struct gfs2_sbd *sdp, uint64_t block) { struct rgrp_tree *rgd; /* Adjust the free space count for the freed block */ rgd = gfs2_blk2rgrpd(sdp, block); /* find the rg for indir block */ if (rgd) { gfs2_set_bitmap(sdp, block, GFS2_BLKST_FREE); rgd->rg.rg_free++; /* adjust the free count */ if (sdp->gfs1) gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]); else gfs2_rgrp_out_bh(&rgd->rg, rgd->bh[0]); sdp->blks_alloced--; } } /** * gfs2_freedi - unlink a disk inode by block number. * Note: currently only works for regular files. */ int gfs2_freedi(struct gfs2_sbd *sdp, uint64_t diblock) { struct gfs2_inode *ip; struct gfs2_buffer_head *bh, *nbh; int h, head_size; uint64_t *ptr, block; struct rgrp_tree *rgd; uint32_t height; osi_list_t metalist[GFS2_MAX_META_HEIGHT]; osi_list_t *cur_list, *next_list, *tmp; for (h = 0; h < GFS2_MAX_META_HEIGHT; h++) osi_list_init(&metalist[h]); bh = bread(sdp, diblock); if (bh == NULL) return -1; ip = lgfs2_inode_get(sdp, bh); if (ip == NULL) return -1; height = ip->i_di.di_height; osi_list_add(&bh->b_altlist, &metalist[0]); for (h = 0; h < height; h++){ cur_list = &metalist[h]; next_list = &metalist[h + 1]; head_size = (h > 0 ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode)); for (tmp = cur_list->next; tmp != cur_list; tmp = tmp->next){ bh = osi_list_entry(tmp, struct gfs2_buffer_head, b_altlist); for (ptr = (uint64_t *)(bh->b_data + head_size); (char *)ptr < (bh->b_data + sdp->bsize); ptr++) { if (!*ptr) continue; block = be64_to_cpu(*ptr); gfs2_free_block(sdp, block); if (h == height - 1) /* if not metadata */ continue; /* don't queue it up */ /* Read the next metadata block in the chain */ nbh = bread(sdp, block); osi_list_add(&nbh->b_altlist, next_list); brelse(nbh); } } } /* Set the bitmap type for inode to free space: */ gfs2_set_bitmap(sdp, ip->i_di.di_num.no_addr, GFS2_BLKST_FREE); inode_put(&ip); /* inode_put deallocated the extra block used by the disk inode, */ /* so adjust it in the superblock struct */ sdp->blks_alloced--; /* Now we have to adjust the rg freespace count and inode count: */ rgd = gfs2_blk2rgrpd(sdp, diblock); rgd->rg.rg_free++; rgd->rg.rg_dinodes--; /* one less inode in use */ if (sdp->gfs1) gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]); else gfs2_rgrp_out_bh(&rgd->rg, rgd->bh[0]); sdp->dinodes_alloced--; return 0; } gfs2-utils/gfs2/libgfs2/gfs1.c0000664000175000017500000002512712111707433014732 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "osi_list.h" #include "libgfs2.h" /* GFS1 compatibility functions - so that programs like gfs2_convert and gfs2_edit can examine/manipulate GFS1 file systems. */ static __inline__ int fs_is_jdata(struct gfs2_inode *ip) { return ip->i_di.di_flags & GFS2_DIF_JDATA; } static __inline__ uint64_t * gfs1_metapointer(struct gfs2_buffer_head *bh, unsigned int height, struct metapath *mp) { unsigned int head_size = (height > 0) ? sizeof(struct gfs_indirect) : sizeof(struct gfs_dinode); return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height]; } int is_gfs_dir(struct gfs2_dinode *dinode) { if (dinode->__pad1 == GFS_FILE_DIR) return 1; return 0; } void gfs1_lookup_block(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, unsigned int height, struct metapath *mp, int create, int *new, uint64_t *block) { uint64_t *ptr = gfs1_metapointer(bh, height, mp); if (*ptr) { *block = be64_to_cpu(*ptr); return; } *block = 0; if (!create) return; if (height == ip->i_di.di_height - 1&& !(S_ISDIR(ip->i_di.di_mode))) *block = data_alloc(ip); else *block = meta_alloc(ip); *ptr = cpu_to_be64(*block); bmodified(bh); ip->i_di.di_blocks++; bmodified(ip->i_bh); *new = 1; } void gfs1_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new, uint64_t *dblock, uint32_t *extlen, int prealloc) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *bh; struct metapath *mp; int create = *new; unsigned int bsize; unsigned int height; unsigned int end_of_metadata; unsigned int x; *new = 0; *dblock = 0; if (extlen) *extlen = 0; if (!ip->i_di.di_height) { /* stuffed */ if (!lblock) { *dblock = ip->i_di.di_num.no_addr; if (extlen) *extlen = 1; } return; } bsize = (fs_is_jdata(ip)) ? sdp->sd_jbsize : sdp->bsize; height = calc_tree_height(ip, (lblock + 1) * bsize); if (ip->i_di.di_height < height) { if (!create) return; build_height(ip, height); } mp = find_metapath(ip, lblock); end_of_metadata = ip->i_di.di_height - 1; bh = ip->i_bh; for (x = 0; x < end_of_metadata; x++) { gfs1_lookup_block(ip, bh, x, mp, create, new, dblock); if (bh != ip->i_bh) brelse(bh); if (!*dblock) goto out; if (*new) { struct gfs2_meta_header mh; bh = bget(sdp, *dblock); mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_IN; mh.mh_format = GFS2_FORMAT_IN; gfs2_meta_header_out_bh(&mh, bh); } else { if (*dblock == ip->i_di.di_num.no_addr) bh = ip->i_bh; else bh = bread(sdp, *dblock); } } if (!prealloc) gfs1_lookup_block(ip, bh, end_of_metadata, mp, create, new, dblock); if (extlen && *dblock) { *extlen = 1; if (!*new) { uint64_t tmp_dblock; int tmp_new; unsigned int nptrs; nptrs = (end_of_metadata) ? sdp->sd_inptrs : sdp->sd_diptrs; while (++mp->mp_list[end_of_metadata] < nptrs) { gfs1_lookup_block(ip, bh, end_of_metadata, mp, FALSE, &tmp_new, &tmp_dblock); if (*dblock + *extlen != tmp_dblock) break; (*extlen)++; } } } if (bh != ip->i_bh) brelse(bh); out: free(mp); } int gfs1_writei(struct gfs2_inode *ip, char *buf, uint64_t offset, unsigned int size) { struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_buffer_head *bh; uint64_t lblock, dblock; uint32_t extlen = 0; unsigned int amount; int new; int journaled = fs_is_jdata(ip); const uint64_t start = offset; int copied = 0; if (!size) return 0; if (!ip->i_di.di_height && /* stuffed */ ((start + size) > (sdp->bsize - sizeof(struct gfs_dinode)))) unstuff_dinode(ip); if (journaled) { lblock = offset / sdp->sd_jbsize; offset %= sdp->sd_jbsize; } else { lblock = offset >> sdp->sd_sb.sb_bsize_shift; offset &= sdp->bsize - 1; } if (!ip->i_di.di_height) /* stuffed */ offset += sizeof(struct gfs_dinode); else if (journaled) offset += sizeof(struct gfs2_meta_header); while (copied < size) { amount = size - copied; if (amount > sdp->bsize - offset) amount = sdp->bsize - offset; if (!extlen){ new = TRUE; gfs1_block_map(ip, lblock, &new, &dblock, &extlen, 0); if (!dblock) return -1; } if (dblock == ip->i_di.di_num.no_addr) bh = ip->i_bh; else bh = bread(sdp, dblock); if (journaled && dblock != ip->i_di.di_num.no_addr ) { struct gfs2_meta_header mh; mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_JD; mh.mh_format = GFS2_FORMAT_JD; gfs2_meta_header_out_bh(&mh, bh); } memcpy(bh->b_data + offset, buf + copied, amount); bmodified(bh); if (bh != ip->i_bh) brelse(bh); copied += amount; lblock++; dblock++; extlen--; offset = (journaled) ? sizeof(struct gfs2_meta_header) : 0; } if (ip->i_di.di_size < start + copied) { bmodified(ip->i_bh); ip->i_di.di_size = start + copied; } ip->i_di.di_mtime = ip->i_di.di_ctime = time(NULL); gfs2_dinode_out(&ip->i_di, ip->i_bh); return copied; } /* ------------------------------------------------------------------------ */ /* gfs_dinode_in */ /* ------------------------------------------------------------------------ */ static void gfs_dinode_in(struct gfs_dinode *di, struct gfs2_buffer_head *bh) { struct gfs_dinode *str = (struct gfs_dinode *)bh->b_data; gfs2_meta_header_in(&di->di_header, bh); gfs2_inum_in(&di->di_num, (char *)&str->di_num); di->di_mode = be32_to_cpu(str->di_mode); di->di_uid = be32_to_cpu(str->di_uid); di->di_gid = be32_to_cpu(str->di_gid); di->di_nlink = be32_to_cpu(str->di_nlink); di->di_size = be64_to_cpu(str->di_size); di->di_blocks = be64_to_cpu(str->di_blocks); di->di_atime = be64_to_cpu(str->di_atime); di->di_mtime = be64_to_cpu(str->di_mtime); di->di_ctime = be64_to_cpu(str->di_ctime); di->di_major = be32_to_cpu(str->di_major); di->di_minor = be32_to_cpu(str->di_minor); di->di_goal_dblk = be64_to_cpu(str->di_goal_dblk); di->di_goal_mblk = be64_to_cpu(str->di_goal_mblk); di->di_flags = be32_to_cpu(str->di_flags); di->di_payload_format = be32_to_cpu(str->di_payload_format); di->di_type = be16_to_cpu(str->di_type); di->di_height = be16_to_cpu(str->di_height); di->di_depth = be16_to_cpu(str->di_depth); di->di_entries = be32_to_cpu(str->di_entries); di->di_eattr = be64_to_cpu(str->di_eattr); } static struct gfs2_inode *__gfs_inode_get(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh, uint64_t di_addr) { struct gfs_dinode gfs1_dinode; struct gfs2_inode *ip; ip = calloc(1, sizeof(struct gfs2_inode)); if (ip == NULL) { return NULL; } ip->bh_owned = 0; if (!bh) { bh = bread(sdp, di_addr); ip->bh_owned = 1; } gfs_dinode_in(&gfs1_dinode, bh); memcpy(&ip->i_di.di_header, &gfs1_dinode.di_header, sizeof(struct gfs2_meta_header)); memcpy(&ip->i_di.di_num, &gfs1_dinode.di_num, sizeof(struct gfs2_inum)); ip->i_di.di_mode = gfs1_dinode.di_mode; ip->i_di.di_uid = gfs1_dinode.di_uid; ip->i_di.di_gid = gfs1_dinode.di_gid; ip->i_di.di_nlink = gfs1_dinode.di_nlink; ip->i_di.di_size = gfs1_dinode.di_size; ip->i_di.di_blocks = gfs1_dinode.di_blocks; ip->i_di.di_atime = gfs1_dinode.di_atime; ip->i_di.di_mtime = gfs1_dinode.di_mtime; ip->i_di.di_ctime = gfs1_dinode.di_ctime; ip->i_di.di_major = gfs1_dinode.di_major; ip->i_di.di_minor = gfs1_dinode.di_minor; ip->i_di.di_goal_data = gfs1_dinode.di_goal_dblk; ip->i_di.di_goal_meta = gfs1_dinode.di_goal_mblk; ip->i_di.di_flags = gfs1_dinode.di_flags; ip->i_di.di_payload_format = gfs1_dinode.di_payload_format; ip->i_di.__pad1 = gfs1_dinode.di_type; ip->i_di.di_height = gfs1_dinode.di_height; ip->i_di.di_depth = gfs1_dinode.di_depth; ip->i_di.di_entries = gfs1_dinode.di_entries; ip->i_di.di_eattr = gfs1_dinode.di_eattr; ip->i_bh = bh; ip->i_sbd = sdp; return ip; } struct gfs2_inode *lgfs2_gfs_inode_get(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh) { return __gfs_inode_get(sdp, bh, 0); } struct gfs2_inode *lgfs2_gfs_inode_read(struct gfs2_sbd *sdp, uint64_t di_addr) { return __gfs_inode_get(sdp, NULL, di_addr); } /* ------------------------------------------------------------------------ */ /* gfs_jindex_in - read in a gfs1 jindex structure. */ /* ------------------------------------------------------------------------ */ void gfs_jindex_in(struct gfs_jindex *jindex, char *jbuf) { struct gfs_jindex *str = (struct gfs_jindex *) jbuf; jindex->ji_addr = be64_to_cpu(str->ji_addr); jindex->ji_nsegment = be32_to_cpu(str->ji_nsegment); jindex->ji_pad = be32_to_cpu(str->ji_pad); memcpy(jindex->ji_reserved, str->ji_reserved, 64); } /* ------------------------------------------------------------------------ */ /* gfs_rgrp_in - Read in a resource group header */ /* ------------------------------------------------------------------------ */ void gfs_rgrp_in(struct gfs_rgrp *rgrp, struct gfs2_buffer_head *rbh) { struct gfs_rgrp *str = (struct gfs_rgrp *)rbh->b_data; gfs2_meta_header_in(&rgrp->rg_header, rbh); rgrp->rg_flags = be32_to_cpu(str->rg_flags); rgrp->rg_free = be32_to_cpu(str->rg_free); rgrp->rg_useddi = be32_to_cpu(str->rg_useddi); rgrp->rg_freedi = be32_to_cpu(str->rg_freedi); gfs2_inum_in(&rgrp->rg_freedi_list, (char *)&str->rg_freedi_list); rgrp->rg_usedmeta = be32_to_cpu(str->rg_usedmeta); rgrp->rg_freemeta = be32_to_cpu(str->rg_freemeta); memcpy(rgrp->rg_reserved, str->rg_reserved, 64); } /* ------------------------------------------------------------------------ */ /* gfs_rgrp_out */ /* ------------------------------------------------------------------------ */ void gfs_rgrp_out(struct gfs_rgrp *rgrp, struct gfs2_buffer_head *rbh) { struct gfs_rgrp *str = (struct gfs_rgrp *)rbh->b_data; gfs2_meta_header_out_bh(&rgrp->rg_header, rbh); str->rg_flags = cpu_to_be32(rgrp->rg_flags); str->rg_free = cpu_to_be32(rgrp->rg_free); str->rg_useddi = cpu_to_be32(rgrp->rg_useddi); str->rg_freedi = cpu_to_be32(rgrp->rg_freedi); gfs2_inum_out(&rgrp->rg_freedi_list, (char *)&str->rg_freedi_list); str->rg_usedmeta = cpu_to_be32(rgrp->rg_usedmeta); str->rg_freemeta = cpu_to_be32(rgrp->rg_freemeta); memcpy(str->rg_reserved, rgrp->rg_reserved, 64); bmodified(rbh); } void gfs_put_leaf_nr(struct gfs2_inode *dip, uint32_t inx, uint64_t leaf_out) { uint64_t leaf_no; int count; leaf_no = cpu_to_be64(leaf_out); count = gfs1_writei(dip, (char *)&leaf_no, inx * sizeof(uint64_t), sizeof(uint64_t)); if (count != sizeof(uint64_t)) { fprintf(stderr, "gfs_put_leaf_nr: Bad internal write.\n"); exit(1); } } gfs2-utils/gfs2/libgfs2/gfs2_disk_hash.c0000664000175000017500000000776012110647577016766 0ustar andyandy#include "clusterautoconfig.h" #include #include "libgfs2.h" static const uint32_t crc_32_tab[] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; /** * gfs2_disk_hash - hash an array of data * @data: the data to be hashed * @len: the length of data to be hashed * * This function must produce the same results as the one in the kernel: * crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF * * Take some data and convert it to a 32-bit hash. * * The hash function is a 32-bit CRC of the data. The algorithm uses * the crc_32_tab table above. * * This may not be the fastest hash function, but it does a fair bit better * at providing uniform results than the others I've looked at. That's * really important for efficient directories. * * Returns: the hash */ uint32_t gfs2_disk_hash(const char *data, int len) { uint32_t hash = 0xFFFFFFFF; for (; len--; data++) hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8); hash = ~hash; return hash; } gfs2-utils/gfs2/libgfs2/gfs2_log.c0000664000175000017500000000053612110647577015604 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include "libgfs2.h" int print_level = MSG_NOTICE; void increase_verbosity(void) { print_level++; } void decrease_verbosity(void) { print_level--; } gfs2-utils/gfs2/libgfs2/gfs2l.c0000664000175000017500000001062312154127655015113 0ustar andyandy#include #include #include #include "libgfs2.h" static void usage(const char *cmd) { printf("A language for modifying and querying a gfs2 file system.\n"); printf("Usage: %s [options] \n", cmd); printf("Available options:\n"); printf(" -h Print this help message and exit\n"); printf(" -f Path to script file or '-' for stdin (the default)\n"); printf(" -T Print a list of gfs2 structure types and exit\n"); printf(" -F Print a list of fields belonging to a type and exit\n"); } struct cmdopts { char *fspath; FILE *src; unsigned help:1; }; static int metastrcmp(const void *a, const void *b) { const struct lgfs2_metadata *m1 = *(struct lgfs2_metadata **)a; const struct lgfs2_metadata *m2 = *(struct lgfs2_metadata **)b; return strcmp(m1->name, m2->name); } static void print_structs(void) { const struct lgfs2_metadata *mlist[lgfs2_metadata_size]; int i; for (i = 0; i < lgfs2_metadata_size; i++) mlist[i] = &lgfs2_metadata[i]; qsort(mlist, lgfs2_metadata_size, sizeof(struct lgfs2_metadata *), metastrcmp); for (i = 0; i < lgfs2_metadata_size; i++) if (mlist[i]->mh_type != GFS2_METATYPE_NONE) printf("%s\n", mlist[i]->name); } static void print_fields(const char *name) { const struct lgfs2_metadata *m = lgfs2_find_mtype_name(name, LGFS2_MD_GFS1|LGFS2_MD_GFS2); if (m != NULL) { const struct lgfs2_metafield *fields = m->fields; const unsigned nfields = m->nfields; int i; for (i = 0; i < nfields; i++) printf("0x%.4x %s\n", fields[i].offset, fields[i].name); } } static int getopts(int argc, char *argv[], struct cmdopts *opts) { int opt; opts->src = stdin; while ((opt = getopt(argc, argv, "F:f:hT")) != -1) { switch (opt) { case 'f': if (strcmp("-", optarg)) { opts->src = fopen(optarg, "r"); if (opts->src == NULL) { perror("Failed to open source file"); return 1; } } break; case 'T': print_structs(); exit(0); case 'F': print_fields(optarg); exit(0); case 'h': opts->help = 1; return 0; default: fprintf(stderr, "Use -h for help\n"); return 1; } } if (argc - optind != 1) { usage(argv[0]); fprintf(stderr, "Missing file system path. Use -h for help.\n"); return 1; } opts->fspath = strdup(argv[optind]); if (opts->fspath == NULL) { perror("getopts"); return 1; } return 0; } static struct gfs2_sbd *openfs(const char *path) { int fd; int ret; int sane; int count; struct gfs2_sbd *sdp = calloc(1, sizeof(struct gfs2_sbd)); if (sdp == NULL) { perror("calloc"); return NULL; } fd = open(path, O_RDWR); if (fd < 0) { fprintf(stderr, "Failed to open %s\n", path); free(sdp); return NULL; } memset(sdp, 0, sizeof(*sdp)); sdp->bsize = GFS2_BASIC_BLOCK; sdp->device_fd = fd; compute_constants(sdp); lgfs2_get_dev_info(fd, &sdp->dinfo); fix_device_geometry(sdp); ret = read_sb(sdp); if (ret != 0) { perror("Could not read sb"); return NULL; } sdp->master_dir = lgfs2_inode_read(sdp, sdp->sd_sb.sb_master_dir.no_addr); gfs2_lookupi(sdp->master_dir, "rindex", 6, &sdp->md.riinode); sdp->fssize = sdp->device.length; if (sdp->md.riinode) { rindex_read(sdp, 0, &count, &sane); } else { perror("Failed to look up rindex"); free(sdp); return NULL; } return sdp; } int main(int argc, char *argv[]) { int ret; struct cmdopts opts = {NULL, NULL}; struct gfs2_sbd *sdp; struct lgfs2_lang_result *result; struct lgfs2_lang_state *state; if (getopts(argc, argv, &opts)) { exit(1); } if (opts.help) { usage(argv[0]); exit(0); } sdp = openfs(argv[optind]); if (sdp == NULL) { exit(1); } state = lgfs2_lang_init(); if (state == NULL) { perror("lgfs2_lang_init failed"); exit(1); } ret = lgfs2_lang_parsef(state, opts.src); if (ret != 0) { fprintf(stderr, "Parse failed\n"); return ret; } for (result = lgfs2_lang_result_next(state, sdp); result != NULL; result = lgfs2_lang_result_next(state, sdp)) { if (result == NULL) { fprintf(stderr, "Failed to interpret script\n"); return -1; } lgfs2_lang_result_print(result); lgfs2_lang_result_free(&result); } gfs2_rgrp_free(&sdp->rgtree); inode_put(&sdp->md.riinode); inode_put(&sdp->master_dir); lgfs2_lang_free(&state); free(opts.fspath); return 0; } // libgfs2 still requires an external print_it function void print_it(const char *label, const char *fmt, const char *fmt2, ...) { return; } gfs2-utils/gfs2/libgfs2/lang.c0000664000175000017500000004007112165024665015016 0ustar andyandy#include #include #include #include #include #include #include #include #include "lang.h" #include "parser.h" const char* ast_type_string[] = { [AST_NONE] = "NONE", // Statements [AST_ST_SET] = "SET", [AST_ST_GET] = "GET", // Expressions [AST_EX_ID] = "IDENTIFIER", [AST_EX_NUMBER] = "NUMBER", [AST_EX_STRING] = "STRING", [AST_EX_ADDRESS] = "ADDRESS", [AST_EX_PATH] = "PATH", [AST_EX_SUBSCRIPT] = "SUBSCRIPT", [AST_EX_OFFSET] = "OFFSET", [AST_EX_BLOCKSPEC] = "BLOCKSPEC", [AST_EX_STRUCTSPEC] = "STRUCTSPEC", [AST_EX_FIELDSPEC] = "FIELDSPEC", [AST_EX_TYPESPEC] = "TYPESPEC", // Keywords [AST_KW_STATE] = "STATE", }; /** * Initialize an expression node of the given type from a source string. * Currently just converts numerical values and string values where * appropriate. String values are duplicted into newly allocated buffers as the * text from the parser will go away. * Returns 0 on success or non-zero with errno set on failure */ static int ast_expr_init(struct ast_node *expr, ast_node_t type, const char *str) { int ret = 0; switch (type) { case AST_EX_OFFSET: str++; // Cut off the + case AST_EX_NUMBER: ret = sscanf(str, "%"SCNi64, &expr->ast_num); if (ret != 1) { return 1; } break; case AST_EX_ID: case AST_EX_PATH: case AST_EX_STRING: expr->ast_str = strdup(str); if (expr->ast_str == NULL) { return 1; } break; case AST_EX_ADDRESS: case AST_EX_SUBSCRIPT: case AST_EX_BLOCKSPEC: case AST_EX_STRUCTSPEC: case AST_EX_FIELDSPEC: case AST_EX_TYPESPEC: case AST_KW_STATE: break; default: errno = EINVAL; return 1; } return 0; } /** * Create a new AST node of a given type from a source string. * Returns a pointer to the new node or NULL on failure with errno set. */ struct ast_node *ast_new(ast_node_t type, const char *text) { struct ast_node *node; node = (struct ast_node *)calloc(1, sizeof(struct ast_node)); if (node == NULL) { goto return_fail; } if (type > _AST_EX_START && ast_expr_init(node, type, text)) { goto return_free; } node->ast_text = strdup(text); if (node->ast_text == NULL) { goto return_free; } node->ast_type = type; return node; return_free: if (node->ast_text) { free(node->ast_text); } if (node->ast_str) { free(node->ast_str); } free(node); return_fail: fprintf(stderr, "Failed to create new value from %s: %s\n", text, strerror(errno)); return NULL; } /** * Free the memory allocated for an AST node and set its pointer to NULL */ void ast_destroy(struct ast_node **node) { if (*node == NULL) { return; } ast_destroy(&(*node)->ast_left); ast_destroy(&(*node)->ast_right); switch((*node)->ast_type) { case AST_EX_ID: case AST_EX_PATH: case AST_EX_STRING: free((*node)->ast_str); break; default: break; } free((*node)->ast_text); free(*node); *node = NULL; } static void ast_string_unescape(char *str) { int head, tail; for (head = tail = 0; str[head] != '\0'; head++, tail++) { if (str[head] == '\\' && str[head+1] != '\0') head++; str[tail] = str[head]; } str[tail] = '\0'; } static uint64_t ast_lookup_path(char *path, struct gfs2_sbd *sbd) { int err = 0; char *c; struct gfs2_inode *ip, *iptmp; char *segment; uint64_t bn = 0; segment = strtok_r(path, "/", &c); ip = lgfs2_inode_read(sbd, sbd->sd_sb.sb_root_dir.no_addr); while (ip != NULL) { if (segment == NULL) { // No more segments bn = ip->i_di.di_num.no_addr; inode_put(&ip); return bn; } ast_string_unescape(segment); err = gfs2_lookupi(ip, segment, strlen(segment), &iptmp); inode_put(&ip); if (err != 0) { errno = -err; break; } ip = iptmp; segment = strtok_r(NULL, "/", &c); } return 0; } enum block_id { ID_SB = 0, ID_MASTER, ID_ROOT, ID_RINDEX, ID_END }; /** * Names of blocks which can be uniquely identified in the fs */ static const char *block_ids[] = { [ID_SB] = "sb", [ID_MASTER] = "master", [ID_ROOT] = "root", [ID_RINDEX] = "rindex", [ID_END] = NULL }; static uint64_t ast_lookup_id(const char *id, struct gfs2_sbd *sbd) { uint64_t bn = 0; int i; for (i = 0; i < ID_END; i++) { if (!strcmp(id, block_ids[i])) { break; } } switch (i) { case ID_SB: bn = sbd->sb_addr; break; case ID_MASTER: bn = sbd->sd_sb.sb_master_dir.no_addr; break; case ID_ROOT: bn = sbd->sd_sb.sb_root_dir.no_addr; break; case ID_RINDEX: bn = sbd->md.riinode->i_di.di_num.no_addr; break; default: return 0; } return bn; } static uint64_t ast_lookup_rgrp(uint64_t rgnum, struct gfs2_sbd *sbd) { uint64_t i = rgnum; struct osi_node *n; for (n = osi_first(&sbd->rgtree); n != NULL && i > 0; n = osi_next(n), i--); if (n != NULL && i == 0) return ((struct rgrp_tree *)n)->ri.ri_addr; fprintf(stderr, "Resource group number out of range: %"PRIu64"\n", rgnum); return 0; } static uint64_t ast_lookup_subscript(struct ast_node *id, struct ast_node *index, struct gfs2_sbd *sbd) { uint64_t bn = 0; const char *name = id->ast_str; if (!strcmp(name, "rgrp")) { bn = ast_lookup_rgrp(index->ast_num, sbd); } else { fprintf(stderr, "Unrecognized identifier %s\n", name); } return bn; } /** * Look up a block and return its number. The kind of lookup depends on the * type of the ast node. */ static uint64_t ast_lookup_block_num(struct ast_node *ast, struct gfs2_sbd *sbd) { uint64_t bn = 0; switch (ast->ast_type) { case AST_EX_OFFSET: bn = ast_lookup_block_num(ast->ast_left, sbd) + ast->ast_num; break; case AST_EX_ADDRESS: if (gfs2_check_range(sbd, ast->ast_num)) break; bn = ast->ast_num; break; case AST_EX_PATH: bn = ast_lookup_path(ast->ast_str, sbd); break; case AST_EX_ID: bn = ast_lookup_id(ast->ast_str, sbd); break; case AST_EX_SUBSCRIPT: bn = ast_lookup_subscript(ast->ast_left, ast->ast_left->ast_left, sbd); break; default: break; } return bn; } static struct gfs2_buffer_head *ast_lookup_block(struct ast_node *node, struct gfs2_sbd *sbd) { uint64_t bn = ast_lookup_block_num(node, sbd); if (bn == 0) { fprintf(stderr, "Block not found: %s\n", node->ast_text); return NULL; } return bread(sbd, bn); } static const char *bitstate_strings[] = { [GFS2_BLKST_FREE] = "Free", [GFS2_BLKST_USED] = "Used", [GFS2_BLKST_UNLINKED] = "Unlinked", [GFS2_BLKST_DINODE] = "Dinode" }; /** * Print a representation of an arbitrary field of an arbitrary GFS2 block to stdout * Returns 0 if successful, 1 otherwise */ static int field_print(const struct gfs2_buffer_head *bh, const struct lgfs2_metadata *mtype, const struct lgfs2_metafield *field) { const char *fieldp = (char *)bh->iov.iov_base + field->offset; printf("%s\t%"PRIu64"\t%u\t%u\t%s\t", mtype->name, bh->b_blocknr, field->offset, field->length, field->name); if (field->flags & LGFS2_MFF_UUID) { printf("'%s'\n", str_uuid((const unsigned char *)fieldp)); } else if (field->flags & LGFS2_MFF_STRING) { printf("'%s'\n", fieldp); } else { switch(field->length) { case 1: printf("%"PRIu8"\n", *(uint8_t *)fieldp); break; case 2: printf("%"PRIu16"\n", be16_to_cpu(*(uint16_t *)fieldp)); break; case 4: printf("%"PRIu32"\n", be32_to_cpu(*(uint32_t *)fieldp)); break; case 8: printf("%"PRIu64"\n", be64_to_cpu(*(uint64_t *)fieldp)); break; default: // "Reserved" field so just print 0 printf("0\n"); return 1; } } return 0; } /** * Print a representation of an arbitrary GFS2 block to stdout */ int lgfs2_lang_result_print(struct lgfs2_lang_result *result) { int i; if (result->lr_mtype != NULL) { for (i = 0; i < result->lr_mtype->nfields; i++) { field_print(result->lr_bh, result->lr_mtype, &result->lr_mtype->fields[i]); } } else { printf("%"PRIu64": %s\n", result->lr_blocknr, bitstate_strings[result->lr_state]); } return 0; } static int ast_get_bitstate(uint64_t bn, struct gfs2_sbd *sbd) { int ret = 0; int state = 0; struct rgrp_tree *rgd = gfs2_blk2rgrpd(sbd, bn); if (rgd == NULL) { fprintf(stderr, "Could not find resource group for block %"PRIu64"\n", bn); return -1; } ret = gfs2_rgrp_read(sbd, rgd); if (ret != 0) { fprintf(stderr, "Failed to read resource group for block %"PRIu64": %d\n", bn, ret); return -1; } state = lgfs2_get_bitmap(sbd, bn, rgd); if (state == -1) { fprintf(stderr, "Failed to acquire bitmap state for block %"PRIu64"\n", bn); return -1; } gfs2_rgrp_relse(rgd); return state; } static const struct lgfs2_metadata *ast_lookup_mtype(const struct gfs2_buffer_head *bh) { const struct lgfs2_metadata *mtype; const uint32_t mh_type = lgfs2_get_block_type(bh); if (mh_type == 0) { fprintf(stderr, "Could not determine type for block %"PRIu64"\n", bh->b_blocknr); return NULL; } mtype = lgfs2_find_mtype(mh_type, bh->sdp->gfs1 ? LGFS2_MD_GFS1 : LGFS2_MD_GFS2); if (mtype == NULL) { fprintf(stderr, "Could not determine meta type for block %"PRIu64"\n", bh->b_blocknr); return NULL; } return mtype; } /** * Interpret the get statement. */ static struct lgfs2_lang_result *ast_interp_get(struct lgfs2_lang_state *state, struct ast_node *ast, struct gfs2_sbd *sbd) { struct lgfs2_lang_result *result = calloc(1, sizeof(struct lgfs2_lang_result)); if (result == NULL) { fprintf(stderr, "Failed to allocate memory for result\n"); return NULL; } if (ast->ast_right->ast_right == NULL) { result->lr_bh = ast_lookup_block(ast->ast_right, sbd); if (result->lr_bh == NULL) { free(result); return NULL; } result->lr_blocknr = result->lr_bh->b_blocknr; result->lr_mtype = ast_lookup_mtype(result->lr_bh); } else if (ast->ast_right->ast_right->ast_type == AST_KW_STATE) { result->lr_blocknr = ast_lookup_block_num(ast->ast_right, sbd); if (result->lr_blocknr == 0) { free(result); return NULL; } result->lr_state = ast_get_bitstate(result->lr_blocknr, sbd); } return result; } /** * Interpret a UUID string by removing hyphens from the string and then * interprets 16 pairs of hex digits as octets. */ static int ast_str_to_uuid(const char *str, uint8_t *uuid) { char s[33]; int head, tail, tmp; for (head = tail = 0; head < strlen(str) && tail < 33; head++) { if (str[head] == '-') continue; s[tail] = tolower(str[head]); if (!((s[tail] >= 'a' && s[tail] <= 'f') || (s[tail] >= '0' && s[tail] <= '9'))) goto invalid; tail++; } if (tail != 32) { goto invalid; } s[tail] = '\0'; for (head = 0; head < 16; head++) { if (sscanf(s+(head*2), "%02x", &tmp) != 1) { goto invalid; } *(uuid + head) = tmp; } return AST_INTERP_SUCCESS; invalid: fprintf(stderr, "Invalid UUID\n"); return AST_INTERP_INVAL; } /** * Set a field of a gfs2 block of a given type to a given value. * Returns AST_INTERP_* to signal success, an invalid field/value or an error. */ static int ast_field_set(struct gfs2_buffer_head *bh, const struct lgfs2_metafield *field, struct ast_node *val) { char *fieldp = (char *)bh->iov.iov_base + field->offset; if (field->flags & LGFS2_MFF_UUID) { uint8_t uuid[16]; int ret = ast_str_to_uuid(val->ast_str, uuid); if (ret != AST_INTERP_SUCCESS) return ret; memcpy(fieldp, uuid, 16); bmodified(bh); return AST_INTERP_SUCCESS; } if ((field->flags & LGFS2_MFF_STRING) && strlen(val->ast_str) > field->length) { fprintf(stderr, "String '%s' is too long for field '%s'\n", val->ast_str, field->name); return AST_INTERP_INVAL; } if (field->flags & (LGFS2_MFF_STRING|LGFS2_MFF_UUID)) { strncpy(fieldp, val->ast_str, field->length - 1); fieldp[field->length - 1] = '\0'; bmodified(bh); return AST_INTERP_SUCCESS; } else { // Numeric fields switch(field->length) { case 1: if (val->ast_num > UINT8_MAX) break; *fieldp = (uint8_t)val->ast_num; bmodified(bh); return AST_INTERP_SUCCESS; case 2: if (val->ast_num > UINT16_MAX) break; *(uint16_t *)fieldp = cpu_to_be16((uint16_t)val->ast_num); bmodified(bh); return AST_INTERP_SUCCESS; case 4: if (val->ast_num > UINT32_MAX) break; *(uint32_t *)fieldp = cpu_to_be32((uint32_t)val->ast_num); bmodified(bh); return AST_INTERP_SUCCESS; case 8: *(uint64_t *)fieldp = cpu_to_be64((uint64_t)val->ast_num); bmodified(bh); return AST_INTERP_SUCCESS; default: // This should never happen return AST_INTERP_ERR; } } fprintf(stderr, "Invalid field assignment: %s (size %d) = %s\n", field->name, field->length, val->ast_text); return AST_INTERP_INVAL; } static const struct lgfs2_metadata *lang_find_mtype(struct ast_node *node, struct gfs2_buffer_head *bh, unsigned ver) { const struct lgfs2_metadata *mtype = NULL; if (node->ast_type == AST_EX_TYPESPEC) { mtype = lgfs2_find_mtype_name(node->ast_str, ver); if (mtype == NULL) fprintf(stderr, "Invalid block type: %s\n", node->ast_text); } else { mtype = lgfs2_find_mtype(lgfs2_get_block_type(bh), ver); if (mtype == NULL) fprintf(stderr, "Unrecognised block at: %s\n", node->ast_text); } return mtype; } /** * Interpret an assignment (set) */ static struct lgfs2_lang_result *ast_interp_set(struct lgfs2_lang_state *state, struct ast_node *ast, struct gfs2_sbd *sbd) { struct ast_node *lookup = ast->ast_right; struct ast_node *fieldspec; struct ast_node *fieldname; struct ast_node *fieldval; int i = 0; int ret = 0; unsigned ver = sbd->gfs1 ? LGFS2_MD_GFS1 : LGFS2_MD_GFS2; struct lgfs2_lang_result *result = calloc(1, sizeof(struct lgfs2_lang_result)); if (result == NULL) { fprintf(stderr, "Failed to allocate memory for result\n"); return NULL; } result->lr_bh = ast_lookup_block(lookup, sbd); if (result->lr_bh == NULL) { goto out_err; } result->lr_mtype = lang_find_mtype(lookup->ast_right, result->lr_bh, ver); if (result->lr_mtype == NULL) { fprintf(stderr, "Unrecognised block at: %s\n", lookup->ast_str); goto out_err; } if (lookup->ast_right->ast_type == AST_EX_TYPESPEC) { struct gfs2_meta_header mh = { .mh_magic = GFS2_MAGIC, .mh_type = result->lr_mtype->mh_type, .mh_format = result->lr_mtype->mh_format, }; gfs2_meta_header_out(&mh, result->lr_bh->iov.iov_base); lookup = lookup->ast_right; } for (fieldspec = lookup->ast_right; fieldspec != NULL && fieldspec->ast_type == AST_EX_FIELDSPEC; fieldspec = fieldspec->ast_left) { fieldname = fieldspec->ast_right; fieldval = fieldname->ast_right; for (i = 0; i < result->lr_mtype->nfields; i++) { if (!strcmp(result->lr_mtype->fields[i].name, fieldname->ast_str)) { ret = ast_field_set(result->lr_bh, &result->lr_mtype->fields[i], fieldval); if (ret != AST_INTERP_SUCCESS) { goto out_err; } break; } } } ret = bwrite(result->lr_bh); if (ret != 0) { fprintf(stderr, "Failed to write modified block %"PRIu64": %s\n", result->lr_bh->b_blocknr, strerror(errno)); goto out_err; } return result; out_err: lgfs2_lang_result_free(&result); return NULL; } static struct lgfs2_lang_result *ast_interpret_node(struct lgfs2_lang_state *state, struct ast_node *ast, struct gfs2_sbd *sbd) { struct lgfs2_lang_result *result = NULL; if (ast->ast_type == AST_ST_SET) { result = ast_interp_set(state, ast, sbd); } else if (ast->ast_type == AST_ST_GET) { result = ast_interp_get(state, ast, sbd); } else { fprintf(stderr, "Invalid AST node type: %d\n", ast->ast_type); } return result; } struct lgfs2_lang_result *lgfs2_lang_result_next(struct lgfs2_lang_state *state, struct gfs2_sbd *sbd) { struct lgfs2_lang_result *result; if (state->ls_interp_curr == NULL) { return NULL; } result = ast_interpret_node(state, state->ls_interp_curr, sbd); if (result == NULL) { return NULL; } state->ls_interp_curr = state->ls_interp_curr->ast_left; return result; } void lgfs2_lang_result_free(struct lgfs2_lang_result **result) { if (*result == NULL) { fprintf(stderr, "Warning: attempted to free a null result\n"); return; } if ((*result)->lr_mtype != NULL) { (*result)->lr_bh->b_modified = 0; brelse((*result)->lr_bh); (*result)->lr_bh = NULL; } free(*result); *result = NULL; } gfs2-utils/gfs2/libgfs2/lang.h0000664000175000017500000000215512154127655015025 0ustar andyandy#ifndef LANG_H #define LANG_H #include #include "libgfs2.h" struct lgfs2_lang_state { int ls_colnum; int ls_linenum; int ls_errnum; struct ast_node *ls_ast_root; struct ast_node *ls_ast_tail; struct ast_node *ls_interp_curr; }; typedef enum { AST_NONE, // Statements AST_ST_SET, AST_ST_GET, _AST_EX_START, // Expressions AST_EX_ID, AST_EX_NUMBER, AST_EX_STRING, AST_EX_ADDRESS, AST_EX_PATH, AST_EX_SUBSCRIPT, AST_EX_OFFSET, AST_EX_BLOCKSPEC, AST_EX_STRUCTSPEC, AST_EX_FIELDSPEC, AST_EX_TYPESPEC, // Keywords AST_KW_STATE, } ast_node_t; enum { AST_INTERP_SUCCESS = 0, // Success AST_INTERP_FAIL = 1, // Failure AST_INTERP_INVAL = 2, // Invalid field/type mismatch AST_INTERP_ERR = 3, // Something went wrong, see errno }; extern const char* ast_type_string[]; struct ast_node { ast_node_t ast_type; struct ast_node *ast_left; struct ast_node *ast_right; char *ast_text; char *ast_str; uint64_t ast_num; }; extern struct ast_node *ast_new(ast_node_t type, const char *text); extern void ast_destroy(struct ast_node **val); #define YYSTYPE struct ast_node * #endif /* LANG_H */ gfs2-utils/gfs2/libgfs2/lexer.l0000664000175000017500000000347612154127655015236 0ustar andyandy%{ #include "lang.h" #include "parser.h" #define EXTRA ((struct lgfs2_lang_state *)yyextra) #define P(token, type, text) do {\ *(yylval) = ast_new(type, text);\ if (*(yylval) == NULL) {\ EXTRA->ls_errnum = errno;\ return 1;\ }\ return (TOK_##token);\ } while(0) #define COLNUM EXTRA->ls_colnum #define YY_USER_ACTION COLNUM += yyleng; %} %option bison-bridge reentrant %option warn debug %option nounput noinput %option noyywrap %option extra-type="struct lgfs2_lang_state *" letter [a-zA-Z_] decdigit [0-9] decnumber -?{decdigit}+ hexdigit [0-9a-fA-F] hexnumber -?0x{hexdigit}+ number ({decnumber}|{hexnumber}) offset \+{number} id {letter}({letter}|{decdigit}|\.)* string \'([^\']|\\\')*\' path \'\/([^\']|\\\')*\' ccomment \/\/.*\n shcomment \#.*\n comment ({ccomment}|{shcomment}) whitespace [ \t\r]+ %% \{ { return TOK_LBRACE; } \} { return TOK_RBRACE; } \[ { return TOK_LBRACKET; } \] { P(RBRACKET, AST_EX_SUBSCRIPT, "[ ]"); } \, { return TOK_COMMA; } \: { P(COLON, AST_EX_FIELDSPEC, yytext); } \; { return TOK_SEMI; } set { P(SET, AST_ST_SET, yytext); } get { P(GET, AST_ST_GET, yytext); } state { P(STATE, AST_KW_STATE, yytext); } {path} { yytext[yyleng-1] = '\0'; P(PATH, AST_EX_PATH, yytext + 1); } {string} { yytext[yyleng-1] = '\0'; P(STRING, AST_EX_STRING, yytext + 1); } {offset} { P(OFFSET, AST_EX_OFFSET, yytext); } {number} { P(NUMBER, AST_EX_NUMBER, yytext); } {id} { P(ID, AST_EX_ID, yytext); } {comment} { COLNUM = 0; EXTRA->ls_linenum++; } <> { return 0; } \n { COLNUM = 0; EXTRA->ls_linenum++; } {whitespace} ; . { printf("Unexpected character '%s' on line %d column %d\n", yytext, yylineno, COLNUM); return 1; } %% gfs2-utils/gfs2/libgfs2/libgfs2.h0000664000175000017500000007522412164515756015447 0ustar andyandy#ifndef __LIBGFS2_DOT_H__ #define __LIBGFS2_DOT_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include "osi_list.h" #include "osi_tree.h" __BEGIN_DECLS #ifndef TRUE #define TRUE (1) #endif #ifndef FALSE #define FALSE (0) #endif #if __BYTE_ORDER == __BIG_ENDIAN #define be16_to_cpu(x) (x) #define be32_to_cpu(x) (x) #define be64_to_cpu(x) (x) #define cpu_to_be16(x) (x) #define cpu_to_be32(x) (x) #define cpu_to_be64(x) (x) #define le16_to_cpu(x) (bswap_16((x))) #define le32_to_cpu(x) (bswap_32((x))) #define le64_to_cpu(x) (bswap_64((x))) #define cpu_to_le16(x) (bswap_16((x))) #define cpu_to_le32(x) (bswap_32((x))) #define cpu_to_le64(x) (bswap_64((x))) #endif /* __BYTE_ORDER == __BIG_ENDIAN */ #if __BYTE_ORDER == __LITTLE_ENDIAN #define be16_to_cpu(x) (bswap_16((x))) #define be32_to_cpu(x) (bswap_32((x))) #define be64_to_cpu(x) (bswap_64((x))) #define cpu_to_be16(x) (bswap_16((x))) #define cpu_to_be32(x) (bswap_32((x))) #define cpu_to_be64(x) (bswap_64((x))) #define le16_to_cpu(x) (x) #define le32_to_cpu(x) (x) #define le64_to_cpu(x) (x) #define cpu_to_le16(x) (x) #define cpu_to_le32(x) (x) #define cpu_to_le64(x) (x) #endif /* __BYTE_ORDER == __LITTLE_ENDIAN */ #define BLOCKMAP_SIZE4(size) (size >> 1) #define BLOCKMAP_BYTE_OFFSET4(x) ((x & 0x0000000000000001) << 2) #define BLOCKMAP_MASK4 (0xf) enum lgfs2_meta_type { LGFS2_MT_GFS2_SB = 0, LGFS2_MT_GFS_SB = 1, LGFS2_MT_RINDEX = 2, LGFS2_MT_GFS2_RGRP = 3, LGFS2_MT_GFS_RGRP = 4, LGFS2_MT_RGRP_BITMAP = 5, LGFS2_MT_GFS2_DINODE = 6, LGFS2_MT_GFS_DINODE = 7, LGFS2_MT_GFS2_INDIRECT = 8, LGFS2_MT_GFS_INDIRECT = 9, LGFS2_MT_DIR_LEAF = 10, LGFS2_MT_JRNL_DATA = 11, LGFS2_MT_GFS2_LOG_HEADER = 12, LGFS2_MT_GFS_LOG_HEADER = 13, LGFS2_MT_GFS2_LOG_DESC = 14, LGFS2_MT_GFS_LOG_DESC = 15, LGFS2_MT_GFS2_LOG_BLOCK = 16, LGFS2_MT_EA_ATTR = 17, LGFS2_MT_EA_DATA = 18, LGFS2_MT_GFS2_QUOTA_CHANGE = 19, LGFS2_MT_DIRENT = 20, LGFS2_MT_EA_HEADER = 21, LGFS2_MT_GFS2_INUM_RANGE = 22, LGFS2_MT_STATFS_CHANGE = 23, LGFS2_MT_GFS_JINDEX = 24, LGFS2_MT_GFS_BLOCK_TAG = 25, LGFS2_MT_DATA = 26, LGFS2_MT_FREE = 27, LGFS2_MT_NR, }; struct lgfs2_symbolic { const uint32_t key; const char *value; }; struct lgfs2_metafield { const char *name; const unsigned offset; const unsigned length; const unsigned flags; #define LGFS2_MFF_RESERVED 0x00001 /* Field is reserved */ #define LGFS2_MFF_POINTER 0x00002 /* Field is a pointer to a block */ #define LGFS2_MFF_ENUM 0x00004 /* Field is an enum */ #define LGFS2_MFF_MASK 0x00008 /* Field is a bitmask */ #define LGFS2_MFF_UUID 0x00010 /* Field is a UUID */ #define LGFS2_MFF_STRING 0x00020 /* Field in an ASCII string */ #define LGFS2_MFF_UID 0x00040 /* Field is a UID */ #define LGFS2_MFF_GID 0x00080 /* Field is a GID */ #define LGFS2_MFF_MODE 0x00100 /* Field is a file mode */ #define LGFS2_MFF_FSBLOCKS 0x00200 /* Units are fs blocks */ #define LGFS2_MFF_BYTES 0x00400 /* Units are bytes */ #define LGFS2_MFF_SHIFT 0x00800 /* Log_{2} quantity */ #define LGFS2_MFF_CHECK 0x01000 /* Field is a checksum */ #define LGFS2_MFF_SECS 0x02000 /* Units are seconds */ #define LGFS2_MFF_NSECS 0x04000 /* Units are nsecs */ #define LGFS2_MFF_MAJOR 0x08000 /* Major device number */ #define LGFS2_MFF_MINOR 0x10000 /* Minor device number */ /* If it is a pointer, then this field must be set */ const unsigned points_to; /* If isenum or ismask are set, these must also be filled in */ const struct lgfs2_symbolic *symtab; const unsigned nsyms; }; struct lgfs2_metadata { const unsigned versions:2; #define LGFS2_MD_GFS1 0x01 #define LGFS2_MD_GFS2 0x02 const unsigned header:1; const uint32_t mh_type; const uint32_t mh_format; const char *name; const struct lgfs2_metafield *fields; const unsigned nfields; const unsigned size; }; struct lgfs2_dev_info { struct stat stat; unsigned readonly:1; long ra_pages; int soft_block_size; int logical_block_size; unsigned int physical_block_size; unsigned int io_min_size; unsigned int io_optimal_size; int io_align_offset; uint64_t size; }; struct device { uint64_t length; }; struct gfs2_bitmap { uint32_t bi_offset; /* The offset in the buffer of the first byte */ uint32_t bi_start; /* The position of the first byte in this block */ uint32_t bi_len; /* The number of bytes in this block */ }; struct rgrp_tree { struct osi_node node; uint64_t start; /* The offset of the beginning of this resource group */ uint64_t length; /* The length of this resource group */ struct gfs2_rindex ri; struct gfs2_rgrp rg; struct gfs2_bitmap *bits; struct gfs2_buffer_head **bh; }; struct lgfs2_rgrp_align { uint64_t base; uint64_t offset; }; typedef struct rgrp_tree *lgfs2_rgrp_t; typedef struct _lgfs2_rgrps *lgfs2_rgrps_t; extern lgfs2_rgrps_t lgfs2_rgrps_init(unsigned bsize, uint64_t start, uint64_t devlen, uint32_t rglen, struct lgfs2_rgrp_align *al); extern unsigned lgfs2_rgsize_for_data(uint64_t blksreq, unsigned bsize); extern lgfs2_rgrp_t lgfs2_rgrp_append(lgfs2_rgrps_t rgs, uint32_t rglen, int expand); extern int lgfs2_rgrp_write(int fd, lgfs2_rgrp_t rg, unsigned bsize); extern int lgfs2_rgrps_end(lgfs2_rgrps_t rgs); extern struct gfs2_rindex *lgfs2_rgrp_index(lgfs2_rgrp_t rg); // Temporary function to aid API migration extern struct osi_node *lgfs2_rgrps_root(lgfs2_rgrps_t rgs) __attribute__((deprecated)); struct gfs2_buffer_head { osi_list_t b_altlist; /* alternate list */ uint64_t b_blocknr; int b_modified; union { char *b_data; struct iovec iov; }; struct gfs2_sbd *sdp; }; struct special_blocks { osi_list_t list; uint64_t block; }; struct gfs2_sbd; struct gfs2_inode { int bh_owned; /* Is this bh owned, iow, should we release it later? */ struct gfs2_dinode i_di; struct gfs2_buffer_head *i_bh; struct gfs2_sbd *i_sbd; }; /* FIXME not sure that i want to keep a record of the inodes or the * contents of them, or both ... if I need to write back to them, it * would be easier to hold the inode as well */ struct per_node { struct gfs2_inode *inum; struct gfs2_inum_range inum_range; struct gfs2_inode *statfs; struct gfs2_statfs_change statfs_change; struct gfs2_inode *unlinked; struct gfs2_inode *quota; struct gfs2_quota_change quota_change; }; struct master_dir { struct gfs2_inode *inum; uint64_t next_inum; struct gfs2_inode *statfs; struct gfs2_statfs_change statfs_change; struct gfs2_rindex rindex; struct gfs2_inode *qinode; struct gfs2_quota quotas; struct gfs2_inode *jiinode; struct gfs2_inode *riinode; struct gfs2_inode *rooti; struct gfs2_inode *pinode; struct gfs2_inode **journal; /* Array of journals */ uint32_t journals; /* Journal count */ struct per_node *pn; /* Array of per_node entries */ }; struct gfs2_sbd { struct gfs2_sb sd_sb; /* a copy of the ondisk structure */ char lockproto[GFS2_LOCKNAME_LEN]; char locktable[GFS2_LOCKNAME_LEN]; unsigned int bsize; /* The block size of the FS (in bytes) */ unsigned int jsize; /* Size of journals (in MB) */ unsigned int rgsize; /* Size of resource groups (in MB) */ unsigned int qcsize; /* Size of quota change files (in MB) */ int debug; int quiet; int expert; int override; char device_name[PATH_MAX]; char *path_name; /* Constants */ uint32_t sd_fsb2bb; uint32_t sd_fsb2bb_shift; uint32_t sd_diptrs; uint32_t sd_inptrs; uint32_t sd_jbsize; uint32_t sd_hash_bsize; uint32_t sd_hash_bsize_shift; uint32_t sd_hash_ptrs; uint32_t sd_blocks_per_bitmap; uint32_t sd_max_dirres; uint32_t sd_max_height; uint64_t sd_heightsize[GFS2_MAX_META_HEIGHT]; uint32_t sd_max_jheight; uint64_t sd_jheightsize[GFS2_MAX_META_HEIGHT]; /* Not specified on the command line, but... */ int64_t time; struct lgfs2_dev_info dinfo; struct device device; int device_fd; int path_fd; uint64_t sb_addr; uint64_t fssize; uint64_t blks_total; uint64_t blks_alloced; uint64_t dinodes_alloced; uint64_t orig_rgrps; uint64_t rgrps; uint64_t new_rgrps; struct osi_root rgtree; struct osi_root rgcalc; unsigned int orig_journals; struct gfs2_inode *master_dir; struct master_dir md; int metafs_fd; char metafs_path[PATH_MAX]; /* where metafs is mounted */ struct special_blocks eattr_blocks; uint64_t rg_one_length; uint64_t rg_length; int gfs1; }; struct metapath { unsigned int mp_list[GFS2_MAX_META_HEIGHT]; }; #define GFS2_DEFAULT_BSIZE (4096) #define GFS2_DEFAULT_JSIZE (128) #define GFS2_DEFAULT_RGSIZE (256) #define GFS2_DEFAULT_UTSIZE (1) #define GFS2_DEFAULT_QCSIZE (1) #define GFS2_DEFAULT_LOCKPROTO "lock_dlm" #define GFS2_MIN_GROW_SIZE (10) #define GFS2_EXCESSIVE_RGS (10000) #define GFS2_EXP_MIN_RGSIZE (1) #define GFS2_MIN_RGSIZE (32) /* Look at this! Why can't we go bigger than 2GB? */ #define GFS2_MAX_RGSIZE (2048) /* meta.c */ extern const struct lgfs2_metadata lgfs2_metadata[]; extern const unsigned lgfs2_metadata_size; extern const struct lgfs2_symbolic lgfs2_metatypes[]; extern const unsigned lgfs2_metatype_size; extern const struct lgfs2_symbolic lgfs2_metaformats[]; extern const unsigned lgfs2_metaformat_size; extern const struct lgfs2_symbolic lgfs2_di_flags[]; extern const unsigned lgfs2_di_flag_size; extern const struct lgfs2_symbolic lgfs2_lh_flags[]; extern const unsigned lgfs2_lh_flag_size; extern const struct lgfs2_symbolic lgfs2_ld_types[]; extern const unsigned lgfs2_ld_type_size; extern const struct lgfs2_symbolic lgfs2_ld1_types[]; extern const unsigned lgfs2_ld1_type_size; extern int lgfs2_selfcheck(void); extern const struct lgfs2_metadata *lgfs2_find_mtype(uint32_t mh_type, const unsigned versions); extern const struct lgfs2_metadata *lgfs2_find_mtype_name(const char *name, const unsigned versions); /* bitmap.c */ struct gfs2_bmap { uint64_t size; uint64_t mapsize; unsigned char *map; }; /* block_list.c */ extern struct special_blocks *blockfind(struct special_blocks *blist, uint64_t num); extern void gfs2_special_add(struct special_blocks *blocklist, uint64_t block); extern void gfs2_special_set(struct special_blocks *blocklist, uint64_t block); extern void gfs2_special_free(struct special_blocks *blist); extern void gfs2_special_clear(struct special_blocks *blocklist, uint64_t block); /* buf.c */ extern struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num); extern struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line, const char *caller); extern int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, uint64_t block, int line, const char *caller); extern int bwrite(struct gfs2_buffer_head *bh); extern int brelse(struct gfs2_buffer_head *bh); extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh); #define bmodified(bh) do { bh->b_modified = 1; } while(0) #define bread(bl, num) __bread(bl, num, __LINE__, __FUNCTION__) #define breadm(bl, bhs, n, block) __breadm(bl, bhs, n, block, __LINE__, __FUNCTION__) /* device_geometry.c */ extern int lgfs2_get_dev_info(int fd, struct lgfs2_dev_info *i); extern void fix_device_geometry(struct gfs2_sbd *sdp); /* fs_bits.c */ #define BFITNOENT (0xFFFFFFFF) /* functions with blk #'s that are buffer relative */ extern unsigned long gfs2_bitfit(const unsigned char *buffer, const unsigned int buflen, unsigned long goal, unsigned char old_state); /* functions with blk #'s that are rgrp relative */ extern uint32_t gfs2_blkalloc_internal(struct rgrp_tree *rgd, uint32_t goal, unsigned char old_state, unsigned char new_state, int do_it); extern int gfs2_check_range(struct gfs2_sbd *sdp, uint64_t blkno); /* functions with blk #'s that are file system relative */ extern int lgfs2_get_bitmap(struct gfs2_sbd *sdp, uint64_t blkno, struct rgrp_tree *rgd); extern int gfs2_set_bitmap(struct gfs2_sbd *sdp, uint64_t blkno, int state); /* fs_geometry.c */ extern uint32_t rgblocks2bitblocks(const unsigned int bsize, const uint32_t rgblocks, uint32_t *ri_data) __attribute__((nonnull(3))); extern uint64_t how_many_rgrps(struct gfs2_sbd *sdp, struct device *dev, int rgsize_specified); extern void compute_rgrp_layout(struct gfs2_sbd *sdp, struct osi_root *rgtree, int rgsize_specified); extern void build_rgrps(struct gfs2_sbd *sdp, int write); /* fs_ops.c */ #define IS_LEAF (1) #define IS_DINODE (2) extern struct metapath *find_metapath(struct gfs2_inode *ip, uint64_t block); extern void lookup_block(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, unsigned int height, struct metapath *mp, int create, int *new, uint64_t *block); extern struct gfs2_inode *lgfs2_inode_get(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh); extern struct gfs2_inode *lgfs2_inode_read(struct gfs2_sbd *sdp, uint64_t di_addr); extern struct gfs2_inode *is_system_inode(struct gfs2_sbd *sdp, uint64_t block); extern void inode_put(struct gfs2_inode **ip); extern uint64_t data_alloc(struct gfs2_inode *ip); extern uint64_t meta_alloc(struct gfs2_inode *ip); extern int lgfs2_dinode_alloc(struct gfs2_sbd *sdp, const uint64_t blksreq, uint64_t *blkno); extern int gfs2_readi(struct gfs2_inode *ip, void *buf, uint64_t offset, unsigned int size); #define gfs2_writei(ip, buf, offset, size) \ __gfs2_writei(ip, buf, offset, size, 1) extern int __gfs2_writei(struct gfs2_inode *ip, void *buf, uint64_t offset, unsigned int size, int resize); extern struct gfs2_buffer_head *get_file_buf(struct gfs2_inode *ip, uint64_t lbn, int prealloc); extern struct gfs2_buffer_head *init_dinode(struct gfs2_sbd *sdp, struct gfs2_inum *inum, unsigned int mode, uint32_t flags, struct gfs2_inum *parent); extern struct gfs2_inode *createi(struct gfs2_inode *dip, const char *filename, unsigned int mode, uint32_t flags); extern struct gfs2_inode *gfs_createi(struct gfs2_inode *dip, const char *filename, unsigned int mode, uint32_t flags); extern void dirent2_del(struct gfs2_inode *dip, struct gfs2_buffer_head *bh, struct gfs2_dirent *prev, struct gfs2_dirent *cur); extern int dir_search(struct gfs2_inode *dip, const char *filename, int len, unsigned int *type, struct gfs2_inum *inum); extern int gfs2_lookupi(struct gfs2_inode *dip, const char *filename, int len, struct gfs2_inode **ipp); extern int dir_add(struct gfs2_inode *dip, const char *filename, int len, struct gfs2_inum *inum, unsigned int type); extern int gfs2_dirent_del(struct gfs2_inode *dip, const char *filename, int filename_len); extern void block_map(struct gfs2_inode *ip, uint64_t lblock, int *new, uint64_t *dblock, uint32_t *extlen, int prealloc); extern void gfs2_get_leaf_nr(struct gfs2_inode *dip, uint32_t index, uint64_t *leaf_out); extern void gfs2_put_leaf_nr(struct gfs2_inode *dip, uint32_t inx, uint64_t leaf_out); extern void dir_split_leaf(struct gfs2_inode *dip, uint32_t start, uint64_t leaf_no, struct gfs2_buffer_head *obh); extern void gfs2_free_block(struct gfs2_sbd *sdp, uint64_t block); extern int gfs2_freedi(struct gfs2_sbd *sdp, uint64_t block); extern int gfs2_get_leaf(struct gfs2_inode *dip, uint64_t leaf_no, struct gfs2_buffer_head **bhp); extern int gfs2_dirent_first(struct gfs2_inode *dip, struct gfs2_buffer_head *bh, struct gfs2_dirent **dent); extern int gfs2_dirent_next(struct gfs2_inode *dip, struct gfs2_buffer_head *bh, struct gfs2_dirent **dent); extern void build_height(struct gfs2_inode *ip, int height); extern void unstuff_dinode(struct gfs2_inode *ip); extern unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size); extern int write_journal(struct gfs2_sbd *sdp, unsigned int j, unsigned int blocks); /* gfs1.c - GFS1 backward compatibility structures and functions */ #define GFS_FORMAT_SB (100) /* Super-Block */ #define GFS_METATYPE_SB (1) /* Super-Block */ #define GFS_FORMAT_FS (1309) /* Filesystem (all-encompassing) */ #define GFS_FORMAT_MULTI (1401) /* Multi-Host */ /* GFS1 Dinode types */ #define GFS_FILE_NON (0) #define GFS_FILE_REG (1) /* regular file */ #define GFS_FILE_DIR (2) /* directory */ #define GFS_FILE_LNK (5) /* link */ #define GFS_FILE_BLK (7) /* block device node */ #define GFS_FILE_CHR (8) /* character device node */ #define GFS_FILE_FIFO (101) /* fifo/pipe */ #define GFS_FILE_SOCK (102) /* socket */ /* GFS 1 journal block types: */ #define GFS_LOG_DESC_METADATA (300) /* metadata */ #define GFS_LOG_DESC_IUL (400) /* unlinked inode */ #define GFS_LOG_DESC_IDA (401) /* de-allocated inode */ #define GFS_LOG_DESC_Q (402) /* quota */ #define GFS_LOG_DESC_LAST (500) /* final in a logged transaction */ struct gfs_indirect { struct gfs2_meta_header in_header; char in_reserved[64]; }; struct gfs_dinode { struct gfs2_meta_header di_header; struct gfs2_inum di_num; /* formal inode # and block address */ __be32 di_mode; /* mode of file */ __be32 di_uid; /* owner's user id */ __be32 di_gid; /* owner's group id */ __be32 di_nlink; /* number (qty) of links to this file */ __be64 di_size; /* number (qty) of bytes in file */ __be64 di_blocks; /* number (qty) of blocks in file */ __be64 di_atime; /* time last accessed */ __be64 di_mtime; /* time last modified */ __be64 di_ctime; /* time last changed */ /* Non-zero only for character or block device nodes */ __be32 di_major; /* device major number */ __be32 di_minor; /* device minor number */ /* Block allocation strategy */ __be64 di_rgrp; /* dinode rgrp block number */ __be64 di_goal_rgrp; /* rgrp to alloc from next */ __be32 di_goal_dblk; /* data block goal */ __be32 di_goal_mblk; /* metadata block goal */ __be32 di_flags; /* GFS_DIF_... */ /* struct gfs_rindex, struct gfs_jindex, or struct gfs_dirent */ __be32 di_payload_format; /* GFS_FORMAT_... */ __be16 di_type; /* GFS_FILE_... type of file */ __be16 di_height; /* height of metadata (0 == stuffed) */ __be32 di_incarn; /* incarnation (unused, see gfs_meta_header) */ __be16 di_pad; /* These only apply to directories */ __be16 di_depth; /* Number of bits in the table */ __be32 di_entries; /* The # (qty) of entries in the directory */ /* This formed an on-disk chain of unused dinodes */ struct gfs2_inum di_next_unused; /* used in old versions only */ __be64 di_eattr; /* extended attribute block number */ char di_reserved[56]; }; struct gfs_sb { /* Order is important; need to be able to read old superblocks in order to support on-disk version upgrades */ struct gfs2_meta_header sb_header; __be32 sb_fs_format; /* GFS_FORMAT_FS (on-disk version) */ __be32 sb_multihost_format; /* GFS_FORMAT_MULTI */ __be32 sb_flags; /* ?? */ __be32 sb_bsize; /* fundamental FS block size in bytes */ __be32 sb_bsize_shift; /* log2(sb_bsize) */ __be32 sb_seg_size; /* Journal segment size in FS blocks */ /* These special inodes do not appear in any on-disk directory. */ struct gfs2_inum sb_jindex_di; /* journal index inode */ struct gfs2_inum sb_rindex_di; /* resource group index inode */ struct gfs2_inum sb_root_di; /* root directory inode */ /* Default inter-node locking protocol (lock module) and namespace */ uint8_t sb_lockproto[GFS2_LOCKNAME_LEN]; /* lock protocol name */ uint8_t sb_locktable[GFS2_LOCKNAME_LEN]; /* unique name for this FS */ /* More special inodes */ struct gfs2_inum sb_quota_di; /* quota inode */ struct gfs2_inum sb_license_di; /* license inode */ char sb_reserved[96]; }; struct gfs_rgrp { struct gfs2_meta_header rg_header; __be32 rg_flags; __be32 rg_free; /* Number (qty) of free data blocks */ /* Dinodes are USEDMETA, but are handled separately from other METAs */ __be32 rg_useddi; /* Number (qty) of dinodes (used or free) */ __be32 rg_freedi; /* Number (qty) of unused (free) dinodes */ struct gfs2_inum rg_freedi_list; /* 1st block in chain of free dinodes */ /* These META statistics do not include dinodes (used or free) */ __be32 rg_usedmeta; /* Number (qty) of used metadata blocks */ __be32 rg_freemeta; /* Number (qty) of unused metadata blocks */ char rg_reserved[64]; }; struct gfs_log_header { struct gfs2_meta_header lh_header; __be32 lh_flags; /* GFS_LOG_HEAD_... */ __be32 lh_pad; __be64 lh_first; /* Block number of first header in this trans */ __be64 lh_sequence; /* Sequence number of this transaction */ __be64 lh_tail; /* Block number of log tail */ __be64 lh_last_dump; /* Block number of last dump */ uint8_t lh_reserved[64]; }; struct gfs_jindex { __be64 ji_addr; /* starting block of the journal */ __be32 ji_nsegment; /* number (quantity) of segments in journal */ __be32 ji_pad; uint8_t ji_reserved[64]; }; struct gfs_log_descriptor { struct gfs2_meta_header ld_header; __be32 ld_type; /* GFS_LOG_DESC_... Type of this log chunk */ __be32 ld_length; /* Number of buffers in this chunk */ __be32 ld_data1; /* descriptor-specific field */ __be32 ld_data2; /* descriptor-specific field */ uint8_t ld_reserved[64]; }; extern int is_gfs_dir(struct gfs2_dinode *dinode); extern void gfs1_lookup_block(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, unsigned int height, struct metapath *mp, int create, int *new, uint64_t *block); extern void gfs1_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new, uint64_t *dblock, uint32_t *extlen, int prealloc); extern int gfs1_writei(struct gfs2_inode *ip, char *buf, uint64_t offset, unsigned int size); extern int gfs1_ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int quiet); extern struct gfs2_inode *lgfs2_gfs_inode_get(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh); extern struct gfs2_inode *lgfs2_gfs_inode_read(struct gfs2_sbd *sdp, uint64_t di_addr); extern void gfs_jindex_in(struct gfs_jindex *jindex, char *buf); extern void gfs_rgrp_in(struct gfs_rgrp *rg, struct gfs2_buffer_head *bh); extern void gfs_rgrp_out(struct gfs_rgrp *rg, struct gfs2_buffer_head *bh); extern void gfs_put_leaf_nr(struct gfs2_inode *dip, uint32_t inx, uint64_t leaf_out); /* gfs2_log.c */ extern int print_level; #define MSG_DEBUG 7 #define MSG_INFO 6 #define MSG_NOTICE 5 #define MSG_WARN 4 #define MSG_ERROR 3 #define MSG_CRITICAL 2 #define MSG_NULL 1 #define log_debug(format...) \ do { if (print_level >= MSG_DEBUG) { \ printf("(%s:%d) ", __FUNCTION__, __LINE__); \ printf(format); } } while(0) #define log_info(format...) \ do { if (print_level >= MSG_INFO) printf(format); } while(0) #define log_notice(format...) \ do { if (print_level >= MSG_NOTICE) printf(format); } while(0) #define log_warn(format...) \ do { if (print_level >= MSG_WARN) printf(format); } while(0) #define log_err(format...) \ do { if (print_level >= MSG_ERROR) fprintf(stderr, format); } while(0) #define log_crit(format...) \ do { if (print_level >= MSG_CRITICAL) fprintf(stderr, format); } while(0) extern void increase_verbosity(void); extern void decrease_verbosity(void); /* misc.c */ extern int compute_heightsize(struct gfs2_sbd *sdp, uint64_t *heightsize, uint32_t *maxheight, uint32_t bsize1, int diptrs, int inptrs); extern int compute_constants(struct gfs2_sbd *sdp); extern int is_pathname_mounted(struct gfs2_sbd *sdp, int *ro_mount); extern int is_gfs2(struct gfs2_sbd *sdp); extern int find_gfs2_meta(struct gfs2_sbd *sdp); extern int dir_exists(const char *dir); extern int check_for_gfs2(struct gfs2_sbd *sdp); extern int mount_gfs2_meta(struct gfs2_sbd *sdp); extern void cleanup_metafs(struct gfs2_sbd *sdp); extern int set_sysfs(const char *fsname, const char *filename, const char *val); extern int is_fsname(char *name); extern void get_random_bytes(void *buf, int nbytes); /* recovery.c */ extern void gfs2_replay_incr_blk(struct gfs2_inode *ip, unsigned int *blk); extern int gfs2_replay_read_block(struct gfs2_inode *ip, unsigned int blk, struct gfs2_buffer_head **bh); extern int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where); extern int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where); extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); extern int get_log_header(struct gfs2_inode *ip, unsigned int blk, struct gfs2_log_header *head); extern int find_good_lh(struct gfs2_inode *ip, unsigned int *blk, struct gfs2_log_header *head); extern int jhead_scan(struct gfs2_inode *ip, struct gfs2_log_header *head); extern int gfs2_find_jhead(struct gfs2_inode *ip, struct gfs2_log_header *head); extern int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head); /* rgrp.c */ extern int gfs2_compute_bitstructs(const uint32_t bsize, struct rgrp_tree *rgd); extern struct rgrp_tree *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk); extern uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_tree *rgd); extern void gfs2_rgrp_relse(struct rgrp_tree *rgd); extern struct rgrp_tree *rgrp_insert(struct osi_root *rgtree, uint64_t rgblock); extern void gfs2_rgrp_free(struct osi_root *rgrp_tree); /* figure out the size of the given resource group, in blocks */ static inline unsigned int rgrp_size(struct rgrp_tree *rgrp) { return rgrp->ri.ri_data + rgrp->ri.ri_length; } /* structures.c */ extern int build_master(struct gfs2_sbd *sdp); extern void build_sb(struct gfs2_sbd *sdp, const unsigned char *uuid); extern int build_journal(struct gfs2_sbd *sdp, int j, struct gfs2_inode *jindex); extern int build_jindex(struct gfs2_sbd *sdp); extern int build_per_node(struct gfs2_sbd *sdp); extern int build_inum(struct gfs2_sbd *sdp); extern int build_statfs(struct gfs2_sbd *sdp); extern int build_rindex(struct gfs2_sbd *sdp); extern int build_quota(struct gfs2_sbd *sdp); extern int build_root(struct gfs2_sbd *sdp); extern int do_init_inum(struct gfs2_sbd *sdp); extern int do_init_statfs(struct gfs2_sbd *sdp); extern int gfs2_check_meta(struct gfs2_buffer_head *bh, int type); extern unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, uint64_t *buf, uint8_t state); /* super.c */ extern int check_sb(struct gfs2_sb *sb); extern int read_sb(struct gfs2_sbd *sdp); extern int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane); extern int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane); extern int write_sb(struct gfs2_sbd *sdp); /* ondisk.c */ extern uint32_t gfs2_disk_hash(const char *data, int len); extern const char *str_uuid(const unsigned char *uuid); extern void gfs2_print_uuid(const unsigned char *uuid); extern void print_it(const char *label, const char *fmt, const char *fmt2, ...) __attribute__((format(printf,2,4))); /* Translation functions */ extern void gfs2_inum_in(struct gfs2_inum *no, char *buf); extern void gfs2_inum_out(struct gfs2_inum *no, char *buf); extern void gfs2_meta_header_in(struct gfs2_meta_header *mh, struct gfs2_buffer_head *bh); extern void gfs2_meta_header_out(const struct gfs2_meta_header *mh, char *buf); extern void gfs2_meta_header_out_bh(const struct gfs2_meta_header *mh, struct gfs2_buffer_head *bh); extern void gfs2_sb_in(struct gfs2_sb *sb, struct gfs2_buffer_head *bh); extern void gfs2_sb_out(struct gfs2_sb *sb, struct gfs2_buffer_head *bh); extern void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf); extern void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf); extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, struct gfs2_buffer_head *bh); extern void gfs2_rgrp_out(const struct gfs2_rgrp *rg, char *buf); extern void gfs2_rgrp_out_bh(const struct gfs2_rgrp *rg, struct gfs2_buffer_head *bh); extern void gfs2_quota_in(struct gfs2_quota *qu, char *buf); extern void gfs2_quota_out(struct gfs2_quota *qu, char *buf); extern void gfs2_dinode_in(struct gfs2_dinode *di, struct gfs2_buffer_head *bh); extern void gfs2_dinode_out(struct gfs2_dinode *di, struct gfs2_buffer_head *bh); extern void gfs2_dirent_in(struct gfs2_dirent *de, char *buf); extern void gfs2_dirent_out(struct gfs2_dirent *de, char *buf); extern void gfs2_leaf_in(struct gfs2_leaf *lf, struct gfs2_buffer_head *bh); extern void gfs2_leaf_out(struct gfs2_leaf *lf, struct gfs2_buffer_head *bh); extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf); extern void gfs2_log_header_in(struct gfs2_log_header *lh, struct gfs2_buffer_head *bh); extern void gfs2_log_header_out(struct gfs2_log_header *lh, struct gfs2_buffer_head *bh); extern void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld, struct gfs2_buffer_head *bh); extern void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld, struct gfs2_buffer_head *bh); extern void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf); extern void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf); extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, struct gfs2_buffer_head *bh); extern void gfs2_quota_change_out(struct gfs2_quota_change *qc, struct gfs2_buffer_head *bh); /* Printing functions */ extern void gfs2_inum_print(struct gfs2_inum *no); extern void gfs2_meta_header_print(struct gfs2_meta_header *mh); extern void gfs2_sb_print(struct gfs2_sb *sb); extern void gfs2_rindex_print(struct gfs2_rindex *ri); extern void gfs2_rgrp_print(struct gfs2_rgrp *rg); extern void gfs2_quota_print(struct gfs2_quota *qu); extern void gfs2_dinode_print(struct gfs2_dinode *di); extern void gfs2_leaf_print(struct gfs2_leaf *lf); extern void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name); extern void gfs2_log_header_print(struct gfs2_log_header *lh); extern void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld); extern void gfs2_statfs_change_print(struct gfs2_statfs_change *sc); extern void gfs2_quota_change_print(struct gfs2_quota_change *qc); /* Language functions */ struct lgfs2_lang_state; struct lgfs2_lang_result { uint64_t lr_blocknr; struct gfs2_buffer_head *lr_bh; const struct lgfs2_metadata *lr_mtype; int lr_state; // GFS2_BLKST_* }; extern struct lgfs2_lang_state *lgfs2_lang_init(void); extern int lgfs2_lang_parsef(struct lgfs2_lang_state *state, FILE *script); extern int lgfs2_lang_parses(struct lgfs2_lang_state *state, const char *script); extern struct lgfs2_lang_result *lgfs2_lang_result_next(struct lgfs2_lang_state *state, struct gfs2_sbd *sbd); extern int lgfs2_lang_result_print(struct lgfs2_lang_result *result); extern void lgfs2_lang_result_free(struct lgfs2_lang_result **result); extern void lgfs2_lang_free(struct lgfs2_lang_state **state); __END_DECLS #endif /* __LIBGFS2_DOT_H__ */ gfs2-utils/gfs2/libgfs2/meta.c0000664000175000017500000005264212154127655015033 0ustar andyandy#include #include #include "libgfs2.h" #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) #define SYM(x) { x, #x }, const struct lgfs2_symbolic lgfs2_metatypes[] = { SYM(GFS2_METATYPE_NONE) SYM(GFS2_METATYPE_SB) SYM(GFS2_METATYPE_RG) SYM(GFS2_METATYPE_RB) SYM(GFS2_METATYPE_DI) SYM(GFS2_METATYPE_IN) SYM(GFS2_METATYPE_LF) SYM(GFS2_METATYPE_JD) SYM(GFS2_METATYPE_LH) SYM(GFS2_METATYPE_LD) SYM(GFS2_METATYPE_LB) SYM(GFS2_METATYPE_EA) SYM(GFS2_METATYPE_ED) SYM(GFS2_METATYPE_QC) }; const unsigned lgfs2_metatype_size = ARRAY_SIZE(lgfs2_metatypes); const struct lgfs2_symbolic lgfs2_metaformats[] = { SYM(GFS2_FORMAT_NONE) SYM(GFS2_FORMAT_SB) SYM(GFS2_FORMAT_RG) SYM(GFS2_FORMAT_RB) SYM(GFS2_FORMAT_DI) SYM(GFS2_FORMAT_IN) SYM(GFS2_FORMAT_LF) SYM(GFS2_FORMAT_JD) SYM(GFS2_FORMAT_LH) SYM(GFS2_FORMAT_LD) SYM(GFS2_FORMAT_LB) SYM(GFS2_FORMAT_EA) SYM(GFS2_FORMAT_ED) SYM(GFS2_FORMAT_QC) SYM(GFS2_FORMAT_RI) SYM(GFS2_FORMAT_DE) SYM(GFS2_FORMAT_QU) }; const unsigned lgfs2_metaformat_size = ARRAY_SIZE(lgfs2_metaformats); const struct lgfs2_symbolic lgfs2_di_flags[] = { SYM(GFS2_DIF_JDATA) SYM(GFS2_DIF_EXHASH) SYM(GFS2_DIF_UNUSED) SYM(GFS2_DIF_EA_INDIRECT) SYM(GFS2_DIF_DIRECTIO) SYM(GFS2_DIF_IMMUTABLE) SYM(GFS2_DIF_APPENDONLY) SYM(GFS2_DIF_NOATIME) SYM(GFS2_DIF_SYNC) SYM(GFS2_DIF_SYSTEM) SYM(GFS2_DIF_TRUNC_IN_PROG) SYM(GFS2_DIF_INHERIT_DIRECTIO) SYM(GFS2_DIF_INHERIT_JDATA) }; const unsigned lgfs2_di_flag_size = ARRAY_SIZE(lgfs2_di_flags); const struct lgfs2_symbolic lgfs2_lh_flags[] = { SYM(GFS2_LOG_HEAD_UNMOUNT) }; const unsigned int lgfs2_lh_flag_size = ARRAY_SIZE(lgfs2_lh_flags); const struct lgfs2_symbolic lgfs2_ld_types[] = { SYM(GFS2_LOG_DESC_METADATA) SYM(GFS2_LOG_DESC_REVOKE) SYM(GFS2_LOG_DESC_JDATA) }; const unsigned int lgfs2_ld_type_size = ARRAY_SIZE(lgfs2_ld_types); const struct lgfs2_symbolic lgfs2_ld1_types[] = { SYM(GFS_LOG_DESC_METADATA) SYM(GFS_LOG_DESC_IUL) SYM(GFS_LOG_DESC_IDA) SYM(GFS_LOG_DESC_Q) SYM(GFS_LOG_DESC_LAST) }; const unsigned int lgfs2_ld1_type_size = ARRAY_SIZE(lgfs2_ld1_types); #undef SYM #define F(f,...) { .name = #f, \ .offset = offsetof(struct STRUCT, f), \ .length = sizeof(((struct STRUCT *)(0))->f), \ __VA_ARGS__ }, #define FP(f,...) F(f, .flags = LGFS2_MFF_POINTER, __VA_ARGS__) #define RF(f) F(f, .flags = LGFS2_MFF_RESERVED) #define RFP(f,...) F(f, .flags = LGFS2_MFF_POINTER|LGFS2_MFF_RESERVED, __VA_ARGS__) #define MH(f) F(f.mh_magic) \ F(f.mh_type, .flags = LGFS2_MFF_ENUM, .symtab=lgfs2_metatypes, .nsyms=ARRAY_SIZE(lgfs2_metatypes)) \ RF(f.__pad0) \ F(f.mh_format, .flags = LGFS2_MFF_ENUM, .symtab=lgfs2_metaformats, .nsyms=ARRAY_SIZE(lgfs2_metaformats)) \ F(f.mh_jid) #define IN(f,...) F(f.no_formal_ino) \ FP(f.no_addr, __VA_ARGS__) #define INR(f,...) RF(f.no_formal_ino) \ RFP(f.no_addr, __VA_ARGS__) #define ANY_COMMON_BLOCK (1 << LGFS2_MT_DIR_LEAF) | \ (1 << LGFS2_MT_JRNL_DATA) | \ (1 << LGFS2_MT_EA_ATTR) | \ (1 << LGFS2_MT_EA_DATA) | \ (1 << LGFS2_MT_DATA) #define ANY_GFS2_BLOCK (1 << LGFS2_MT_GFS2_DINODE) | \ (1 << LGFS2_MT_GFS2_INDIRECT) | \ (1 << LGFS2_MT_GFS2_LOG_HEADER) | \ (1 << LGFS2_MT_GFS2_LOG_DESC) | \ (1 << LGFS2_MT_GFS2_LOG_BLOCK) | \ ANY_COMMON_BLOCK #define ANY_GFS_BLOCK (1 << LGFS2_MT_GFS_DINODE) | \ (1 << LGFS2_MT_GFS_INDIRECT) | \ ANY_COMMON_BLOCK #undef STRUCT #define STRUCT gfs2_sb static const struct lgfs2_metafield gfs2_sb_fields[] = { MH(sb_header) F(sb_fs_format) F(sb_multihost_format) RF(__pad0) F(sb_bsize, .flags = LGFS2_MFF_BYTES) F(sb_bsize_shift, .flags = LGFS2_MFF_BYTES|LGFS2_MFF_SHIFT) RF(__pad1) IN(sb_master_dir, .points_to = (1 << LGFS2_MT_GFS2_DINODE)) INR(__pad2, .points_to = (1 << LGFS2_MT_GFS2_DINODE)) IN(sb_root_dir, .points_to = (1 << LGFS2_MT_GFS2_DINODE)) F(sb_lockproto, .flags = LGFS2_MFF_STRING) F(sb_locktable, .flags = LGFS2_MFF_STRING) INR(__pad3, .points_to = (1 << LGFS2_MT_GFS2_DINODE)) INR(__pad4, .points_to = (1 << LGFS2_MT_GFS2_DINODE)) F(sb_uuid, .flags = LGFS2_MFF_UUID) }; #undef STRUCT #define STRUCT gfs_sb static const struct lgfs2_metafield gfs_sb_fields[] = { MH(sb_header) F(sb_fs_format) F(sb_multihost_format) F(sb_flags) F(sb_bsize, .flags = LGFS2_MFF_BYTES) F(sb_bsize_shift, .flags = LGFS2_MFF_BYTES|LGFS2_MFF_SHIFT) F(sb_seg_size, .flags = LGFS2_MFF_FSBLOCKS) IN(sb_jindex_di, .points_to = (1 << LGFS2_MT_GFS_DINODE)) IN(sb_rindex_di, .points_to = (1 << LGFS2_MT_GFS_DINODE)) IN(sb_root_di, .points_to = (1 << LGFS2_MT_GFS_DINODE)) F(sb_lockproto, .flags = LGFS2_MFF_STRING) F(sb_locktable, .flags = LGFS2_MFF_STRING) IN(sb_quota_di, .points_to = (1 << LGFS2_MT_GFS_DINODE)) IN(sb_license_di, .points_to = (1 << LGFS2_MT_GFS_DINODE)) RF(sb_reserved) }; #undef STRUCT #define STRUCT gfs2_rindex static const struct lgfs2_metafield gfs2_rindex_fields[] = { FP(ri_addr, .points_to = (1 << LGFS2_MT_GFS2_RGRP)) F(ri_length, .flags = LGFS2_MFF_FSBLOCKS) RF(__pad) FP(ri_data0, .points_to = ANY_GFS2_BLOCK|(1 << LGFS2_MT_FREE)) F(ri_data, .flags = LGFS2_MFF_FSBLOCKS) F(ri_bitbytes, .flags = LGFS2_MFF_BYTES) F(ri_reserved) }; #undef STRUCT #define STRUCT gfs2_rgrp static const struct lgfs2_metafield gfs2_rgrp_fields[] = { MH(rg_header) F(rg_flags) F(rg_free, .flags = LGFS2_MFF_FSBLOCKS) F(rg_dinodes, .flags = LGFS2_MFF_FSBLOCKS) RF(__pad) F(rg_igeneration) RF(rg_reserved) }; #undef STRUCT #define STRUCT gfs_rgrp static const struct lgfs2_metafield gfs_rgrp_fields[] = { MH(rg_header) F(rg_flags) F(rg_free, .flags = LGFS2_MFF_FSBLOCKS) F(rg_useddi, .flags = LGFS2_MFF_FSBLOCKS) F(rg_freedi, .flags = LGFS2_MFF_FSBLOCKS) IN(rg_freedi_list, .points_to = (1 << LGFS2_MT_GFS_DINODE)) F(rg_usedmeta, .flags = LGFS2_MFF_FSBLOCKS) F(rg_freemeta, .flags = LGFS2_MFF_FSBLOCKS) RF(rg_reserved) }; #undef STRUCT struct gfs2_rgrp_bitmap { struct gfs2_meta_header rb_header; }; #define STRUCT gfs2_rgrp_bitmap static const struct lgfs2_metafield gfs2_rgrp_bitmap_fields[] = { MH(rb_header) }; #undef STRUCT #define STRUCT gfs2_dinode static const struct lgfs2_metafield gfs2_dinode_fields[] = { MH(di_header) IN(di_num, .points_to = (1 << LGFS2_MT_GFS2_DINODE)) F(di_mode, .flags = LGFS2_MFF_MODE) F(di_uid, .flags = LGFS2_MFF_UID) F(di_gid, .flags = LGFS2_MFF_GID) F(di_nlink) F(di_size, .flags = LGFS2_MFF_BYTES) F(di_blocks, .flags = LGFS2_MFF_FSBLOCKS) F(di_atime, .flags = LGFS2_MFF_SECS) F(di_mtime, .flags = LGFS2_MFF_SECS) F(di_ctime, .flags = LGFS2_MFF_SECS) F(di_major, .flags = LGFS2_MFF_MAJOR) F(di_minor, .flags = LGFS2_MFF_MINOR) FP(di_goal_meta, .points_to = ANY_GFS2_BLOCK | (1 << LGFS2_MT_FREE)) FP(di_goal_data, .points_to = ANY_GFS2_BLOCK | (1 << LGFS2_MT_FREE)) F(di_generation) F(di_flags, .flags = LGFS2_MFF_MASK, .symtab=lgfs2_di_flags, .nsyms=ARRAY_SIZE(lgfs2_di_flags)) F(di_payload_format) RF(__pad1) F(di_height) RF(__pad2) RF(__pad3) F(di_depth) F(di_entries) INR(__pad4, .points_to = (1 << LGFS2_MT_GFS2_DINODE)) FP(di_eattr, .points_to = (1 << LGFS2_MT_EA_ATTR)|(1 << LGFS2_MT_GFS2_INDIRECT)) F(di_atime_nsec, .flags = LGFS2_MFF_NSECS) F(di_mtime_nsec, .flags = LGFS2_MFF_NSECS) F(di_ctime_nsec, .flags = LGFS2_MFF_NSECS) RF(di_reserved) }; #undef STRUCT #define STRUCT gfs_dinode static const struct lgfs2_metafield gfs_dinode_fields[] = { MH(di_header) IN(di_num, .points_to = (1 << LGFS2_MT_GFS_DINODE)) F(di_mode, .flags = LGFS2_MFF_MODE) F(di_uid, .flags = LGFS2_MFF_UID) F(di_gid, .flags = LGFS2_MFF_GID) F(di_nlink) F(di_size, .flags = LGFS2_MFF_BYTES) F(di_blocks, .flags = LGFS2_MFF_FSBLOCKS) F(di_atime, .flags = LGFS2_MFF_SECS) F(di_mtime, .flags = LGFS2_MFF_SECS) F(di_ctime, .flags = LGFS2_MFF_SECS) F(di_major, .flags = LGFS2_MFF_MAJOR) F(di_minor, .flags = LGFS2_MFF_MINOR) FP(di_rgrp, .points_to = LGFS2_MT_GFS_RGRP) FP(di_goal_rgrp, .points_to = LGFS2_MT_GFS_RGRP) F(di_goal_dblk) F(di_goal_mblk) F(di_flags, .flags = LGFS2_MFF_MASK, .symtab=lgfs2_di_flags, .nsyms=ARRAY_SIZE(lgfs2_di_flags)) F(di_payload_format) F(di_type) F(di_height) F(di_incarn) F(di_pad) F(di_depth) F(di_entries) INR(di_next_unused, .points_to = (1 << LGFS2_MT_GFS_DINODE)) FP(di_eattr, .points_to = (1 << LGFS2_MT_EA_ATTR)|(1 << LGFS2_MT_GFS_INDIRECT)) F(di_reserved) }; #undef STRUCT struct gfs2_indirect { struct gfs2_meta_header in_header; }; #define STRUCT gfs2_indirect static const struct lgfs2_metafield gfs2_indirect_fields[] = { MH(in_header) }; #undef STRUCT #define STRUCT gfs_indirect static const struct lgfs2_metafield gfs_indirect_fields[] = { MH(in_header) RF(in_reserved) }; #undef STRUCT #define STRUCT gfs2_leaf static const struct lgfs2_metafield gfs2_leaf_fields[] = { MH(lf_header) F(lf_depth) F(lf_entries) F(lf_dirent_format) F(lf_next) RF(lf_reserved) }; #undef STRUCT struct gfs2_jrnl_data { struct gfs2_meta_header jd_header; }; #define STRUCT gfs2_jrnl_data static const struct lgfs2_metafield gfs2_jdata_fields[] = { MH(jd_header) }; #undef STRUCT #define STRUCT gfs2_log_header static const struct lgfs2_metafield gfs2_log_header_fields[] = { MH(lh_header) F(lh_sequence) F(lh_flags) F(lh_tail) F(lh_blkno) F(lh_hash, .flags = LGFS2_MFF_CHECK) }; #undef STRUCT #define STRUCT gfs_log_header static const struct lgfs2_metafield gfs_log_header_fields[] = { MH(lh_header) F(lh_flags, .flags = LGFS2_MFF_MASK, .symtab = lgfs2_lh_flags, .nsyms = ARRAY_SIZE(lgfs2_lh_flags)) RF(lh_pad) F(lh_first) F(lh_sequence) F(lh_tail) F(lh_last_dump) RF(lh_reserved) }; #undef STRUCT #define STRUCT gfs2_log_descriptor static const struct lgfs2_metafield gfs2_log_desc_fields[] = { MH(ld_header) F(ld_type, .flags = LGFS2_MFF_ENUM, .symtab = lgfs2_ld_types, .nsyms = ARRAY_SIZE(lgfs2_ld_types)) F(ld_length, .flags = LGFS2_MFF_FSBLOCKS) F(ld_data1) F(ld_data2) RF(ld_reserved) }; #undef STRUCT #define STRUCT gfs_log_descriptor static const struct lgfs2_metafield gfs_log_desc_fields[] = { MH(ld_header) F(ld_type, .flags = LGFS2_MFF_ENUM, .symtab = lgfs2_ld1_types, .nsyms = ARRAY_SIZE(lgfs2_ld1_types)) F(ld_length, .flags = LGFS2_MFF_FSBLOCKS) F(ld_data1) F(ld_data2) RF(ld_reserved) }; #undef STRUCT struct gfs2_log_block { struct gfs2_meta_header lb_header; }; #define STRUCT gfs2_log_block static const struct lgfs2_metafield gfs2_log_block_fields[] = { MH(lb_header) }; #undef STRUCT struct gfs2_ea_attr { struct gfs2_meta_header ea_header; }; #define STRUCT gfs2_ea_attr static const struct lgfs2_metafield gfs2_ea_attr_fields[] = { MH(ea_header) }; #undef STRUCT struct gfs2_ea_data { struct gfs2_meta_header ed_header; }; #define STRUCT gfs2_ea_data static const struct lgfs2_metafield gfs2_ea_data_fields[] = { MH(ed_header) }; #undef STRUCT #define STRUCT gfs2_quota_change static const struct lgfs2_metafield gfs2_quota_change_fields[] = { F(qc_change, .flags = LGFS2_MFF_FSBLOCKS) F(qc_flags) F(qc_id) }; #undef STRUCT #define STRUCT gfs2_dirent static const struct lgfs2_metafield gfs2_dirent_fields[] = { IN(de_inum, .points_to = (1 << LGFS2_MT_GFS_DINODE)|(1 << LGFS2_MT_GFS2_DINODE)) F(de_hash, .flags = LGFS2_MFF_CHECK) F(de_rec_len, .flags = LGFS2_MFF_BYTES) F(de_name_len, .flags = LGFS2_MFF_BYTES) F(de_type) RF(__pad) }; #undef STRUCT #define STRUCT gfs2_ea_header static const struct lgfs2_metafield gfs2_ea_header_fields[] = { F(ea_rec_len, .flags = LGFS2_MFF_BYTES) F(ea_data_len, .flags = LGFS2_MFF_BYTES) F(ea_name_len, .flags = LGFS2_MFF_BYTES) F(ea_type) F(ea_flags) F(ea_num_ptrs) RF(__pad) }; #undef STRUCT #define STRUCT gfs2_inum_range static const struct lgfs2_metafield gfs2_inum_range_fields[] = { F(ir_start) F(ir_length) }; #undef STRUCT #define STRUCT gfs2_statfs_change static const struct lgfs2_metafield gfs2_statfs_change_fields[] = { F(sc_total, .flags = LGFS2_MFF_FSBLOCKS) F(sc_free, .flags = LGFS2_MFF_FSBLOCKS) F(sc_dinodes, .flags = LGFS2_MFF_FSBLOCKS) }; #undef STRUCT #define STRUCT gfs_jindex static const struct lgfs2_metafield gfs_jindex_fields[] = { FP(ji_addr, .points_to = (1 << LGFS2_MT_DATA)) F(ji_nsegment) RF(ji_pad) RF(ji_reserved) }; #undef STRUCT struct gfs_block_tag { uint64_t bt_blkno; /* inplace block number */ uint32_t bt_flags; /* ?? */ uint32_t bt_pad; }; #define STRUCT gfs_block_tag static const struct lgfs2_metafield gfs_block_tag_fields[] = { FP(bt_blkno, .points_to = ANY_GFS_BLOCK) RF(bt_flags) RF(bt_pad) }; const struct lgfs2_metadata lgfs2_metadata[] = { [LGFS2_MT_GFS2_SB] = { .versions = LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_SB, .mh_format = GFS2_FORMAT_SB, .name = "gfs2_sb", .fields = gfs2_sb_fields, .nfields = ARRAY_SIZE(gfs2_sb_fields), .size = sizeof(struct gfs2_sb), }, [LGFS2_MT_GFS_SB] = { .versions = LGFS2_MD_GFS1, .header = 1, .mh_type = GFS2_METATYPE_SB, .mh_format = GFS_FORMAT_SB, .name = "gfs_sb", .fields = gfs_sb_fields, .nfields = ARRAY_SIZE(gfs_sb_fields), .size = sizeof(struct gfs_sb), }, [LGFS2_MT_RINDEX] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .name = "rindex", .fields = gfs2_rindex_fields, .nfields = ARRAY_SIZE(gfs2_rindex_fields), .size = sizeof(struct gfs2_rindex), }, [LGFS2_MT_GFS2_RGRP] = { .versions = LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_RG, .mh_format = GFS2_FORMAT_RG, .name = "gfs2_rgrp", .fields = gfs2_rgrp_fields, .nfields = ARRAY_SIZE(gfs2_rgrp_fields), .size = sizeof(struct gfs2_rgrp), }, [LGFS2_MT_GFS_RGRP] = { .versions = LGFS2_MD_GFS1, .header = 1, .mh_type = GFS2_METATYPE_RG, .mh_format = GFS2_FORMAT_RG, .name = "gfs_rgrp", .fields = gfs_rgrp_fields, .nfields = ARRAY_SIZE(gfs_rgrp_fields), .size = sizeof(struct gfs_rgrp), }, [LGFS2_MT_RGRP_BITMAP] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_RB, .mh_format = GFS2_FORMAT_RB, .name = "gfs2_rgrp_bitmap", .fields = gfs2_rgrp_bitmap_fields, .nfields = ARRAY_SIZE(gfs2_rgrp_bitmap_fields), .size = sizeof(struct gfs2_meta_header), }, [LGFS2_MT_GFS2_DINODE] = { .versions = LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_DI, .mh_format = GFS2_FORMAT_DI, .name = "gfs2_dinode", .fields = gfs2_dinode_fields, .nfields = ARRAY_SIZE(gfs2_dinode_fields), .size = sizeof(struct gfs2_dinode), }, [LGFS2_MT_GFS_DINODE] = { .versions = LGFS2_MD_GFS1, .header = 1, .mh_type = GFS2_METATYPE_DI, .mh_format = GFS2_FORMAT_DI, .name = "gfs_dinode", .fields = gfs_dinode_fields, .nfields = ARRAY_SIZE(gfs_dinode_fields), .size = sizeof(struct gfs_dinode), }, [LGFS2_MT_GFS2_INDIRECT] = { .versions = LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_IN, .mh_format = GFS2_FORMAT_IN, .name = "gfs2_indirect", .fields = gfs2_indirect_fields, .nfields = ARRAY_SIZE(gfs2_indirect_fields), .size = sizeof(struct gfs2_meta_header), }, [LGFS2_MT_GFS_INDIRECT] = { .versions = LGFS2_MD_GFS1, .header = 1, .mh_type = GFS2_METATYPE_IN, .mh_format = GFS2_FORMAT_IN, .name = "gfs_indirect", .fields = gfs_indirect_fields, .nfields = ARRAY_SIZE(gfs_indirect_fields), .size = sizeof(struct gfs_indirect), }, [LGFS2_MT_DIR_LEAF] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_LF, .mh_format = GFS2_FORMAT_LF, .name = "gfs2_leaf", .fields = gfs2_leaf_fields, .nfields = ARRAY_SIZE(gfs2_leaf_fields), .size = sizeof(struct gfs2_leaf), }, [LGFS2_MT_JRNL_DATA] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_JD, .mh_format = GFS2_FORMAT_JD, .name = "gfs2_jdata", .fields = gfs2_jdata_fields, .nfields = ARRAY_SIZE(gfs2_jdata_fields), .size = sizeof(struct gfs2_meta_header), }, [LGFS2_MT_GFS2_LOG_HEADER] = { .versions = LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_LH, .mh_format = GFS2_FORMAT_LH, .name = "gfs2_log_header", .fields = gfs2_log_header_fields, .nfields = ARRAY_SIZE(gfs2_log_header_fields), .size = sizeof(struct gfs2_log_header), }, [LGFS2_MT_GFS_LOG_HEADER] = { .versions = LGFS2_MD_GFS1, .header = 1, .mh_type = GFS2_METATYPE_LH, .mh_format = GFS2_FORMAT_LH, .name = "gfs_log_header", .fields = gfs_log_header_fields, .nfields = ARRAY_SIZE(gfs_log_header_fields), .size = sizeof(struct gfs_log_header), }, [LGFS2_MT_GFS2_LOG_DESC] = { .versions = LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_LD, .mh_format = GFS2_FORMAT_LD, .name = "gfs2_log_desc", .fields = gfs2_log_desc_fields, .nfields = ARRAY_SIZE(gfs2_log_desc_fields), .size = sizeof(struct gfs2_log_descriptor), }, [LGFS2_MT_GFS_LOG_DESC] = { .versions = LGFS2_MD_GFS1, .header = 1, .mh_type = GFS2_METATYPE_LD, .mh_format = GFS2_FORMAT_LD, .name = "gfs_log_desc", .fields = gfs_log_desc_fields, .nfields = ARRAY_SIZE(gfs_log_desc_fields), .size = sizeof(struct gfs_log_descriptor), }, [LGFS2_MT_GFS2_LOG_BLOCK] = { .versions = LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_LB, .mh_format = GFS2_FORMAT_LB, .name = "gfs2_log_block", .fields = gfs2_log_block_fields, .nfields = ARRAY_SIZE(gfs2_log_block_fields), .size = sizeof(struct gfs2_meta_header), }, [LGFS2_MT_EA_ATTR] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_EA, .mh_format = GFS2_FORMAT_EA, .name = "gfs2_ea_attr", .fields = gfs2_ea_attr_fields, .nfields = ARRAY_SIZE(gfs2_ea_attr_fields), .size = sizeof(struct gfs2_meta_header), }, [LGFS2_MT_EA_DATA] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .header = 1, .mh_type = GFS2_METATYPE_ED, .mh_format = GFS2_FORMAT_ED, .name = "gfs2_ea_data", .fields = gfs2_ea_data_fields, .nfields = ARRAY_SIZE(gfs2_ea_data_fields), .size = sizeof(struct gfs2_meta_header), }, [LGFS2_MT_GFS2_QUOTA_CHANGE] = { .versions = LGFS2_MD_GFS2, .name = "gfs2_quota_change", .fields = gfs2_quota_change_fields, .nfields = ARRAY_SIZE(gfs2_quota_change_fields), .size = sizeof(struct gfs2_quota_change), }, [LGFS2_MT_DIRENT] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .name = "gfs2_dirent", .fields = gfs2_dirent_fields, .nfields = ARRAY_SIZE(gfs2_dirent_fields), .size = sizeof(struct gfs2_dirent), }, [LGFS2_MT_EA_HEADER] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .name = "gfs2_ea_header", .fields = gfs2_ea_header_fields, .nfields = ARRAY_SIZE(gfs2_ea_header_fields), .size = sizeof(struct gfs2_ea_header), }, [LGFS2_MT_GFS2_INUM_RANGE] = { .versions = LGFS2_MD_GFS2, .name = "gfs2_inum_range", .fields = gfs2_inum_range_fields, .nfields = ARRAY_SIZE(gfs2_inum_range_fields), .size = sizeof(struct gfs2_inum_range), }, [LGFS2_MT_STATFS_CHANGE] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .name = "gfs2_statfs_change", .fields = gfs2_statfs_change_fields, .nfields = ARRAY_SIZE(gfs2_statfs_change_fields), .size = sizeof(struct gfs2_statfs_change), }, [LGFS2_MT_GFS_JINDEX] = { .versions = LGFS2_MD_GFS1, .name = "gfs_jindex", .fields = gfs_jindex_fields, .nfields = ARRAY_SIZE(gfs_jindex_fields), .size = sizeof(struct gfs_jindex), }, [LGFS2_MT_GFS_BLOCK_TAG] = { .versions = LGFS2_MD_GFS1, .name = "gfs_block_tag", .fields = gfs_block_tag_fields, .nfields = ARRAY_SIZE(gfs_block_tag_fields), .size = sizeof(struct gfs_block_tag), }, [LGFS2_MT_DATA] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .name = "data", }, [LGFS2_MT_FREE] = { .versions = LGFS2_MD_GFS1 | LGFS2_MD_GFS2, .name = "free", }, }; const unsigned lgfs2_metadata_size = ARRAY_SIZE(lgfs2_metadata); static int check_metadata_sizes(void) { unsigned offset; int i, j; int ret = 0; for (i = 0; i < lgfs2_metadata_size; i++) { const struct lgfs2_metadata *m = &lgfs2_metadata[i]; offset = 0; for (j = 0; j < m->nfields; j++) { const struct lgfs2_metafield *f = &m->fields[j]; if (f->offset != offset) { fprintf(stderr, "%s: %s: offset is %u, expected %u\n", m->name, f->name, f->offset, offset); ret = -1; } offset += f->length; } if (offset != m->size) { fprintf(stderr, "%s: size mismatch between struct %u and fields %u\n", m->name, m->size, offset); ret = -1; } } return ret; } static int check_symtab(void) { int i, j; int ret = 0; for (i = 0; i < lgfs2_metadata_size; i++) { const struct lgfs2_metadata *m = &lgfs2_metadata[i]; for (j = 0; j < m->nfields; j++) { const struct lgfs2_metafield *f = &m->fields[j]; if (f->flags & (LGFS2_MFF_MASK|LGFS2_MFF_ENUM)) { if (f->symtab == NULL) { fprintf(stderr, "%s: Missing symtab for %s\n", m->name, f->name); ret = -1; } } if (f->symtab) { if (!(f->flags & (LGFS2_MFF_MASK|LGFS2_MFF_ENUM))) { fprintf(stderr, "%s: Symtab for non-enum and non-mask field %s\n", m->name, f->name); ret = -1; } } } } return ret; } static int check_ptrs(void) { int i, j; int ret = 0; for (i = 0; i < lgfs2_metadata_size; i++) { const struct lgfs2_metadata *m = &lgfs2_metadata[i]; for (j = 0; j < m->nfields; j++) { const struct lgfs2_metafield *f = &m->fields[j]; if ((f->flags & LGFS2_MFF_POINTER) && !f->points_to) { fprintf(stderr, "%s: Pointer entry %s has no destination\n", m->name, f->name); ret = -1; } } } return ret; } int lgfs2_selfcheck(void) { int ret = 0; ret |= check_metadata_sizes(); ret |= check_symtab(); ret |= check_ptrs(); return ret; } const struct lgfs2_metadata *lgfs2_find_mtype(uint32_t mh_type, const unsigned versions) { const struct lgfs2_metadata *m = lgfs2_metadata; unsigned n = 0; do { if ((m[n].versions & versions) && m[n].mh_type == mh_type) return &m[n]; n++; } while (n < lgfs2_metadata_size); return NULL; } const struct lgfs2_metadata *lgfs2_find_mtype_name(const char *name, const unsigned versions) { const struct lgfs2_metadata *m = lgfs2_metadata; unsigned n = 0; do { if ((m[n].versions & versions) && !strcmp(m[n].name, name)) return &m[n]; n++; } while (n < lgfs2_metadata_size); return NULL; } gfs2-utils/gfs2/libgfs2/misc.c0000664000175000017500000001633712135216402015025 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libgfs2.h" #define PAGE_SIZE (4096) #define SYS_BASE "/sys/fs/gfs2" /* FIXME: Look in /proc/mounts to find this */ #define DIV_RU(x, y) (((x) + (y) - 1) / (y)) int compute_heightsize(struct gfs2_sbd *sdp, uint64_t *heightsize, uint32_t *maxheight, uint32_t bsize1, int diptrs, int inptrs) { heightsize[0] = sdp->bsize - sizeof(struct gfs2_dinode); heightsize[1] = bsize1 * diptrs; for (*maxheight = 2;; (*maxheight)++) { uint64_t space, d; uint32_t m; space = heightsize[*maxheight - 1] * inptrs; m = space % inptrs; d = space / inptrs; if (d != heightsize[*maxheight - 1] || m) break; heightsize[*maxheight] = space; } if (*maxheight > GFS2_MAX_META_HEIGHT) { errno = EINVAL; return -1; } return 0; } int compute_constants(struct gfs2_sbd *sdp) { uint32_t hash_blocks, ind_blocks, leaf_blocks; uint32_t tmp_blocks; sdp->md.next_inum = 1; sdp->sd_sb.sb_bsize_shift = ffs(sdp->bsize) - 1; sdp->sb_addr = GFS2_SB_ADDR * GFS2_BASIC_BLOCK / sdp->bsize; sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT; sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; sdp->sd_diptrs = (sdp->bsize - sizeof(struct gfs2_dinode)) / sizeof(uint64_t); sdp->sd_inptrs = (sdp->bsize - sizeof(struct gfs2_meta_header)) / sizeof(uint64_t); sdp->sd_jbsize = sdp->bsize - sizeof(struct gfs2_meta_header); sdp->sd_hash_bsize = sdp->bsize / 2; sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t); sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) * GFS2_NBBY; /* Compute maximum reservation required to add a entry to a directory */ hash_blocks = DIV_RU(sizeof(uint64_t) * (1 << GFS2_DIR_MAX_DEPTH), sdp->sd_jbsize); ind_blocks = 0; for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { tmp_blocks = DIV_RU(tmp_blocks, sdp->sd_inptrs); ind_blocks += tmp_blocks; } leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; if (compute_heightsize(sdp, sdp->sd_heightsize, &sdp->sd_max_height, sdp->bsize, sdp->sd_diptrs, sdp->sd_inptrs)) { return -1; } if (compute_heightsize(sdp, sdp->sd_jheightsize, &sdp->sd_max_jheight, sdp->sd_jbsize, sdp->sd_diptrs, sdp->sd_inptrs)) { return -1; } return 0; } int is_pathname_mounted(struct gfs2_sbd *sdp, int *ro_mount) { FILE *fp; struct mntent *mnt; dev_t file_dev=0, file_rdev=0; ino_t file_ino=0; struct stat st_buf; *ro_mount = 0; if ((fp = setmntent("/proc/mounts", "r")) == NULL) { perror("open: /proc/mounts"); return 0; } if (stat(sdp->path_name, &st_buf) == 0) { if (S_ISBLK(st_buf.st_mode)) { #ifndef __GNU__ /* The GNU hurd is broken with respect to stat devices */ file_rdev = st_buf.st_rdev; #endif /* __GNU__ */ } else { file_dev = st_buf.st_dev; file_ino = st_buf.st_ino; } } while ((mnt = getmntent (fp)) != NULL) { /* Check if they specified the device instead of mnt point */ if (strcmp(sdp->device_name, mnt->mnt_fsname) == 0) { strcpy(sdp->path_name, mnt->mnt_dir); /* fix it */ break; } if (strcmp(sdp->path_name, mnt->mnt_dir) == 0) { strcpy(sdp->device_name, mnt->mnt_fsname); /* fix it */ break; } if (stat(mnt->mnt_fsname, &st_buf) == 0) { if (S_ISBLK(st_buf.st_mode)) { #ifndef __GNU__ if (file_rdev && (file_rdev == st_buf.st_rdev)) break; #endif /* __GNU__ */ } else { if (file_dev && ((file_dev == st_buf.st_dev) && (file_ino == st_buf.st_ino))) break; } } } endmntent (fp); if (mnt == NULL) return 0; if (stat(mnt->mnt_dir, &st_buf) < 0) { if (errno == ENOENT) return 0; } /* Can't trust fstype because / has "rootfs". */ if (file_rdev && (st_buf.st_dev != file_rdev)) return 0; if (hasmntopt(mnt, MNTOPT_RO)) *ro_mount = 1; return 1; /* mounted */ } int is_gfs2(struct gfs2_sbd *sdp) { int fd, rc; struct gfs2_sb sb; fd = open(sdp->device_name, O_RDWR); if (fd < 0) return 0; rc = 0; if (lseek(fd, GFS2_SB_ADDR * GFS2_BASIC_BLOCK, SEEK_SET) >= 0 && read(fd, &sb, sizeof(sb)) == sizeof(sb) && be32_to_cpu(sb.sb_header.mh_magic) == GFS2_MAGIC && be32_to_cpu(sb.sb_header.mh_type) == GFS2_METATYPE_SB) rc = 1; close(fd); return rc; } int check_for_gfs2(struct gfs2_sbd *sdp) { int ro; if (!is_pathname_mounted(sdp, &ro)) return -1; if (!is_gfs2(sdp)) return -1; return 0; } static int lock_for_admin(struct gfs2_sbd *sdp) { int error; if (sdp->debug) printf("\nTrying to get admin lock...\n"); sdp->metafs_fd = open(sdp->metafs_path, O_RDONLY | O_NOFOLLOW); if (sdp->metafs_fd < 0) return -1; error = flock(sdp->metafs_fd, LOCK_EX); if (error) return -1; if (sdp->debug) printf("Got it.\n"); return 0; } int mount_gfs2_meta(struct gfs2_sbd *sdp) { int ret; memset(sdp->metafs_path, 0, PATH_MAX); snprintf(sdp->metafs_path, PATH_MAX - 1, "/tmp/.gfs2meta.XXXXXX"); if(!mkdtemp(sdp->metafs_path)) return -1; ret = mount(sdp->path_name, sdp->metafs_path, "gfs2meta", 0, NULL); if (ret) { rmdir(sdp->metafs_path); return -1; } if (lock_for_admin(sdp)) return -1; return 0; } void cleanup_metafs(struct gfs2_sbd *sdp) { int ret; if (sdp->metafs_fd <= 0) return; fsync(sdp->metafs_fd); close(sdp->metafs_fd); ret = umount(sdp->metafs_path); if (ret) fprintf(stderr, "Couldn't unmount %s : %s\n", sdp->metafs_path, strerror(errno)); else rmdir(sdp->metafs_path); } int set_sysfs(const char *fsname, const char *filename, const char *val) { char path[PATH_MAX]; int fd, rv, len; len = strlen(val) + 1; if (len > PAGE_SIZE) { errno = EINVAL; return -1; } memset(path, 0, PATH_MAX); snprintf(path, PATH_MAX - 1, "%s/%s/%s", SYS_BASE, fsname, filename); fd = open(path, O_WRONLY); if (fd < 0) return -1; rv = write(fd, val, len); if (rv != len) { close(fd); return -1; } close(fd); return 0; } /* * get_random_bytes - Generate a series of random bytes using /dev/urandom. * * Modified from original code in gen_uuid.c in e2fsprogs/lib */ void get_random_bytes(void *buf, int nbytes) { int i, n = nbytes, fd; int lose_counter = 0; unsigned char *cp = (unsigned char *) buf; struct timeval tv; gettimeofday(&tv, 0); fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC); srand((getpid() << 16) ^ getuid() ^ tv.tv_sec ^ tv.tv_usec); /* Crank the random number generator a few times */ gettimeofday(&tv, 0); for (i = (tv.tv_sec ^ tv.tv_usec) & 0x1F; i > 0; i--) rand(); if (fd >= 0) { while (n > 0) { i = read(fd, cp, n); if (i <= 0) { if (lose_counter++ > 16) break; continue; } n -= i; cp += i; lose_counter = 0; } close(fd); } /* * We do this all the time, but this is the only source of * randomness if /dev/random/urandom is out to lunch. */ for (cp = buf, i = 0; i < nbytes; i++) *cp++ ^= (rand() >> 7) & 0xFF; return; } gfs2-utils/gfs2/libgfs2/ondisk.c0000664000175000017500000003706112111707433015361 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include "libgfs2.h" #define pv(struct, member, fmt, fmt2) do { \ print_it(" "#member, fmt, fmt2, struct->member); \ } while (FALSE); #define pv2(struct, member, fmt, fmt2) do { \ print_it(" ", fmt, fmt2, struct->member); \ } while (FALSE); #define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));} #define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));} #define CPIN_16(s1, s2, member) {(s1->member) = be16_to_cpu((s2->member));} #define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_be16((s1->member));} #define CPIN_32(s1, s2, member) {(s1->member) = be32_to_cpu((s2->member));} #define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_be32((s1->member));} #define CPIN_64(s1, s2, member) {(s1->member) = be64_to_cpu((s2->member));} #define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_be64((s1->member));} /* * gfs2_xxx_in - read in an xxx struct * first arg: the cpu-order structure * bh: the disk-order buffer_head * * gfs2_xxx_out - write out an xxx struct * first arg: the cpu-order structure * bh: the disk-order buffer_head * * gfs2_xxx_print - print out an xxx struct * first arg: the cpu-order structure */ void gfs2_inum_in(struct gfs2_inum *no, char *buf) { struct gfs2_inum *str = (struct gfs2_inum *)buf; CPIN_64(no, str, no_formal_ino); CPIN_64(no, str, no_addr); } void gfs2_inum_out(struct gfs2_inum *no, char *buf) { struct gfs2_inum *str = (struct gfs2_inum *)buf; CPOUT_64(no, str, no_formal_ino); CPOUT_64(no, str, no_addr); } void gfs2_inum_print(struct gfs2_inum *no) { pv(no, no_formal_ino, "%llu", "0x%llx"); pv(no, no_addr, "%llu", "0x%llx"); } void gfs2_meta_header_in(struct gfs2_meta_header *mh, struct gfs2_buffer_head *bh) { struct gfs2_meta_header *str = (struct gfs2_meta_header *)bh->b_data; CPIN_32(mh, str, mh_magic); CPIN_32(mh, str, mh_type); CPIN_32(mh, str, mh_format); } void gfs2_meta_header_out(const struct gfs2_meta_header *mh, char *buf) { struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf; CPOUT_32(mh, str, mh_magic); CPOUT_32(mh, str, mh_type); CPOUT_32(mh, str, mh_format); str->__pad0 = 0; str->__pad1 = 0; } void gfs2_meta_header_out_bh(const struct gfs2_meta_header *mh, struct gfs2_buffer_head *bh) { gfs2_meta_header_out(mh, bh->iov.iov_base); bmodified(bh); } void gfs2_meta_header_print(struct gfs2_meta_header *mh) { pv(mh, mh_magic, "0x%08X", NULL); pv(mh, mh_type, "%u", "0x%x"); pv(mh, mh_format, "%u", "0x%x"); } void gfs2_sb_in(struct gfs2_sb *sb, struct gfs2_buffer_head *bh) { struct gfs2_sb *str = (struct gfs2_sb *)bh->b_data; gfs2_meta_header_in(&sb->sb_header, bh); CPIN_32(sb, str, sb_fs_format); CPIN_32(sb, str, sb_multihost_format); CPIN_32(sb, str, __pad0); /* gfs sb_flags */ CPIN_32(sb, str, sb_bsize); CPIN_32(sb, str, sb_bsize_shift); CPIN_32(sb, str, __pad1); /* gfs sb_seg_size */ gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir); gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir); CPIN_08(sb, str, sb_lockproto, GFS2_LOCKNAME_LEN); CPIN_08(sb, str, sb_locktable, GFS2_LOCKNAME_LEN); gfs2_inum_in(&sb->__pad2, (char *)&str->__pad2); /* gfs rindex */ gfs2_inum_in(&sb->__pad3, (char *)&str->__pad3); /* gfs quota */ gfs2_inum_in(&sb->__pad4, (char *)&str->__pad4); /* gfs license */ #ifdef GFS2_HAS_UUID CPIN_08(sb, str, sb_uuid, sizeof(sb->sb_uuid)); #endif } void gfs2_sb_out(struct gfs2_sb *sb, struct gfs2_buffer_head *bh) { struct gfs2_sb *str = (struct gfs2_sb *)bh->b_data; gfs2_meta_header_out_bh(&sb->sb_header, bh); CPOUT_32(sb, str, sb_fs_format); CPOUT_32(sb, str, sb_multihost_format); CPOUT_32(sb, str, sb_bsize); CPOUT_32(sb, str, sb_bsize_shift); gfs2_inum_out(&sb->sb_master_dir, (char *)&str->sb_master_dir); gfs2_inum_out(&sb->sb_root_dir, (char *)&str->sb_root_dir); CPOUT_08(sb, str, sb_lockproto, GFS2_LOCKNAME_LEN); CPOUT_08(sb, str, sb_locktable, GFS2_LOCKNAME_LEN); #ifdef GFS2_HAS_UUID memcpy(str->sb_uuid, sb->sb_uuid, 16); #endif bmodified(bh); } const char *str_uuid(const unsigned char *uuid) { static char str[64]; char *ch; int i; memset(str, 0, sizeof(str)); ch = str; for (i = 0; i < 16; i++) { sprintf(ch, "%02x", uuid[i]); ch += 2; if ((i == 3) || (i == 5) || (i == 7) || (i == 9)) { *ch = '-'; ch++; } } return str; } #ifdef GFS2_HAS_UUID void gfs2_print_uuid(const unsigned char *uuid) { print_it(" uuid", "%s", NULL, str_uuid(uuid)); } #endif void gfs2_sb_print(struct gfs2_sb *sb) { gfs2_meta_header_print(&sb->sb_header); pv(sb, sb_fs_format, "%u", "0x%x"); pv(sb, sb_multihost_format, "%u", "0x%x"); pv(sb, sb_bsize, "%u", "0x%x"); pv(sb, sb_bsize_shift, "%u", "0x%x"); gfs2_inum_print(&sb->sb_master_dir); gfs2_inum_print(&sb->sb_root_dir); pv(sb, sb_lockproto, "%s", NULL); pv(sb, sb_locktable, "%s", NULL); #ifdef GFS2_HAS_UUID gfs2_print_uuid(sb->sb_uuid); #endif } void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf) { struct gfs2_rindex *str = (struct gfs2_rindex *)buf; CPIN_64(ri, str, ri_addr); CPIN_32(ri, str, ri_length); CPIN_64(ri, str, ri_data0); CPIN_32(ri, str, ri_data); CPIN_32(ri, str, ri_bitbytes); CPIN_08(ri, str, ri_reserved, 64); } void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf) { struct gfs2_rindex *str = (struct gfs2_rindex *)buf; CPOUT_64(ri, str, ri_addr); CPOUT_32(ri, str, ri_length); ri->__pad = 0; CPOUT_64(ri, str, ri_data0); CPOUT_32(ri, str, ri_data); CPOUT_32(ri, str, ri_bitbytes); CPOUT_08(ri, str, ri_reserved, 64); } void gfs2_rindex_print(struct gfs2_rindex *ri) { pv(ri, ri_addr, "%llu", "0x%llx"); pv(ri, ri_length, "%u", "0x%x"); pv(ri, ri_data0, "%llu", "0x%llx"); pv(ri, ri_data, "%u", "0x%x"); pv(ri, ri_bitbytes, "%u", "0x%x"); } void gfs2_rgrp_in(struct gfs2_rgrp *rg, struct gfs2_buffer_head *bh) { struct gfs2_rgrp *str = (struct gfs2_rgrp *)bh->b_data; gfs2_meta_header_in(&rg->rg_header, bh); CPIN_32(rg, str, rg_flags); CPIN_32(rg, str, rg_free); CPIN_32(rg, str, rg_dinodes); CPIN_08(rg, str, rg_reserved, 80); } void gfs2_rgrp_out(const struct gfs2_rgrp *rg, char *buf) { struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf; gfs2_meta_header_out(&rg->rg_header, buf); CPOUT_32(rg, str, rg_flags); CPOUT_32(rg, str, rg_free); CPOUT_32(rg, str, rg_dinodes); CPOUT_08(rg, str, rg_reserved, 80); } void gfs2_rgrp_out_bh(const struct gfs2_rgrp *rg, struct gfs2_buffer_head *bh) { gfs2_rgrp_out(rg, bh->iov.iov_base); bmodified(bh); } void gfs2_rgrp_print(struct gfs2_rgrp *rg) { gfs2_meta_header_print(&rg->rg_header); pv(rg, rg_flags, "%u", "0x%x"); pv(rg, rg_free, "%u", "0x%x"); pv(rg, rg_dinodes, "%u", "0x%x"); } void gfs2_quota_in(struct gfs2_quota *qu, char *buf) { struct gfs2_quota *str = (struct gfs2_quota *)buf; CPIN_64(qu, str, qu_limit); CPIN_64(qu, str, qu_warn); CPIN_64(qu, str, qu_value); CPIN_08(qu, str, qu_reserved, sizeof(qu->qu_reserved)); } void gfs2_quota_out(struct gfs2_quota *qu, char *buf) { struct gfs2_quota *str = (struct gfs2_quota *)buf; CPOUT_64(qu, str, qu_limit); CPOUT_64(qu, str, qu_warn); CPOUT_64(qu, str, qu_value); memset(qu->qu_reserved, 0, sizeof(qu->qu_reserved)); } void gfs2_quota_print(struct gfs2_quota *qu) { pv(qu, qu_limit, "%llu", "0x%llx"); pv(qu, qu_warn, "%llu", "0x%llx"); pv(qu, qu_value, "%lld", "0x%llx"); } void gfs2_dinode_in(struct gfs2_dinode *di, struct gfs2_buffer_head *bh) { struct gfs2_dinode *str = (struct gfs2_dinode *)bh->b_data; gfs2_meta_header_in(&di->di_header, bh); gfs2_inum_in(&di->di_num, (char *)&str->di_num); CPIN_32(di, str, di_mode); CPIN_32(di, str, di_uid); CPIN_32(di, str, di_gid); CPIN_32(di, str, di_nlink); CPIN_64(di, str, di_size); CPIN_64(di, str, di_blocks); CPIN_64(di, str, di_atime); CPIN_64(di, str, di_mtime); CPIN_64(di, str, di_ctime); CPIN_32(di, str, di_major); CPIN_32(di, str, di_minor); CPIN_64(di, str, di_goal_meta); CPIN_64(di, str, di_goal_data); CPIN_32(di, str, di_flags); CPIN_32(di, str, di_payload_format); CPIN_16(di, str, __pad1); CPIN_16(di, str, di_height); CPIN_16(di, str, di_depth); CPIN_32(di, str, di_entries); CPIN_64(di, str, di_eattr); CPIN_08(di, str, di_reserved, 32); } void gfs2_dinode_out(struct gfs2_dinode *di, struct gfs2_buffer_head *bh) { struct gfs2_dinode *str = (struct gfs2_dinode *)bh->b_data; gfs2_meta_header_out_bh(&di->di_header, bh); gfs2_inum_out(&di->di_num, (char *)&str->di_num); CPOUT_32(di, str, di_mode); CPOUT_32(di, str, di_uid); CPOUT_32(di, str, di_gid); CPOUT_32(di, str, di_nlink); CPOUT_64(di, str, di_size); CPOUT_64(di, str, di_blocks); CPOUT_64(di, str, di_atime); CPOUT_64(di, str, di_mtime); CPOUT_64(di, str, di_ctime); CPOUT_32(di, str, di_major); CPOUT_32(di, str, di_minor); CPOUT_64(di, str, di_goal_meta); CPOUT_64(di, str, di_goal_data); CPOUT_32(di, str, di_flags); CPOUT_32(di, str, di_payload_format); CPOUT_16(di, str, __pad1); CPOUT_16(di, str, di_height); CPOUT_16(di, str, di_depth); CPOUT_32(di, str, di_entries); CPOUT_64(di, str, di_eattr); CPOUT_08(di, str, di_reserved, 32); bmodified(bh); } void gfs2_dinode_print(struct gfs2_dinode *di) { gfs2_meta_header_print(&di->di_header); gfs2_inum_print(&di->di_num); pv(di, di_mode, "0%o", NULL); pv(di, di_uid, "%u", "0x%x"); pv(di, di_gid, "%u", "0x%x"); pv(di, di_nlink, "%u", "0x%x"); pv(di, di_size, "%llu", "0x%llx"); pv(di, di_blocks, "%llu", "0x%llx"); pv(di, di_atime, "%lld", "0x%llx"); pv(di, di_mtime, "%lld", "0x%llx"); pv(di, di_ctime, "%lld", "0x%llx"); pv(di, di_major, "%u", "0x%llx"); pv(di, di_minor, "%u", "0x%llx"); pv(di, di_goal_meta, "%llu", "0x%llx"); pv(di, di_goal_data, "%llu", "0x%llx"); pv(di, di_flags, "0x%.8X", NULL); pv(di, di_payload_format, "%u", "0x%x"); pv(di, di_height, "%u", "0x%x"); pv(di, di_depth, "%u", "0x%x"); pv(di, di_entries, "%u", "0x%x"); pv(di, di_eattr, "%llu", "0x%llx"); } void gfs2_dirent_in(struct gfs2_dirent *de, char *buf) { struct gfs2_dirent *str = (struct gfs2_dirent *)buf; gfs2_inum_in(&de->de_inum, buf); CPIN_32(de, str, de_hash); CPIN_16(de, str, de_rec_len); CPIN_16(de, str, de_name_len); CPIN_16(de, str, de_type); } void gfs2_dirent_out(struct gfs2_dirent *de, char *buf) { struct gfs2_dirent *str = (struct gfs2_dirent *)buf; gfs2_inum_out(&de->de_inum, buf); CPOUT_32(de, str, de_hash); CPOUT_16(de, str, de_rec_len); CPOUT_16(de, str, de_name_len); CPOUT_16(de, str, de_type); memset(str->__pad, 0, sizeof(str->__pad)); } void gfs2_leaf_in(struct gfs2_leaf *lf, struct gfs2_buffer_head *bh) { struct gfs2_leaf *str = (struct gfs2_leaf *)bh->b_data; gfs2_meta_header_in(&lf->lf_header, bh); CPIN_16(lf, str, lf_depth); CPIN_16(lf, str, lf_entries); CPIN_32(lf, str, lf_dirent_format); CPIN_64(lf, str, lf_next); CPIN_08(lf, str, lf_reserved, 32); } void gfs2_leaf_out(struct gfs2_leaf *lf, struct gfs2_buffer_head *bh) { struct gfs2_leaf *str = (struct gfs2_leaf *)bh->b_data; gfs2_meta_header_out_bh(&lf->lf_header, bh); CPOUT_16(lf, str, lf_depth); CPOUT_16(lf, str, lf_entries); CPOUT_32(lf, str, lf_dirent_format); CPOUT_64(lf, str, lf_next); CPOUT_08(lf, str, lf_reserved, 32); bmodified(bh); } void gfs2_leaf_print(struct gfs2_leaf *lf) { gfs2_meta_header_print(&lf->lf_header); pv(lf, lf_depth, "%u", "0x%x"); pv(lf, lf_entries, "%u", "0x%x"); pv(lf, lf_dirent_format, "%u", "0x%x"); pv(lf, lf_next, "%llu", "0x%llx"); } void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf) { struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf; CPIN_32(ea, str, ea_rec_len); CPIN_32(ea, str, ea_data_len); ea->ea_name_len = str->ea_name_len; ea->ea_type = str->ea_type; ea->ea_flags = str->ea_flags; ea->ea_num_ptrs = str->ea_num_ptrs; } void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name) { char buf[GFS2_EA_MAX_NAME_LEN + 1]; pv(ea, ea_rec_len, "%u", "0x%x"); pv(ea, ea_data_len, "%u", "0x%x"); pv(ea, ea_name_len, "%u", "0x%x"); pv(ea, ea_type, "%u", "0x%x"); pv(ea, ea_flags, "%u", "0x%x"); pv(ea, ea_num_ptrs, "%u", "0x%x"); memset(buf, 0, GFS2_EA_MAX_NAME_LEN + 1); memcpy(buf, name, ea->ea_name_len); print_it(" name", "%s", NULL, buf); } void gfs2_log_header_in(struct gfs2_log_header *lh, struct gfs2_buffer_head *bh) { struct gfs2_log_header *str = (struct gfs2_log_header *)bh->b_data; gfs2_meta_header_in(&lh->lh_header, bh); CPIN_64(lh, str, lh_sequence); CPIN_32(lh, str, lh_flags); CPIN_32(lh, str, lh_tail); CPIN_32(lh, str, lh_blkno); CPIN_32(lh, str, lh_hash); } void gfs2_log_header_out(struct gfs2_log_header *lh, struct gfs2_buffer_head *bh) { struct gfs2_log_header *str = (struct gfs2_log_header *)bh->b_data; gfs2_meta_header_out_bh(&lh->lh_header, bh); CPOUT_64(lh, str, lh_sequence); CPOUT_32(lh, str, lh_flags); CPOUT_32(lh, str, lh_tail); CPOUT_32(lh, str, lh_blkno); CPOUT_32(lh, str, lh_hash); bmodified(bh); } void gfs2_log_header_print(struct gfs2_log_header *lh) { gfs2_meta_header_print(&lh->lh_header); pv(lh, lh_sequence, "%llu", "0x%llx"); pv(lh, lh_flags, "0x%.8X", NULL); pv(lh, lh_tail, "%u", "0x%x"); pv(lh, lh_blkno, "%u", "0x%x"); pv(lh, lh_hash, "0x%.8X", NULL); } void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld, struct gfs2_buffer_head *bh) { struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)bh->b_data; gfs2_meta_header_in(&ld->ld_header, bh); CPIN_32(ld, str, ld_type); CPIN_32(ld, str, ld_length); CPIN_32(ld, str, ld_data1); CPIN_32(ld, str, ld_data2); CPIN_08(ld, str, ld_reserved, 32); } void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld, struct gfs2_buffer_head *bh) { struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)bh->b_data; gfs2_meta_header_out_bh(&ld->ld_header, bh); CPOUT_32(ld, str, ld_type); CPOUT_32(ld, str, ld_length); CPOUT_32(ld, str, ld_data1); CPOUT_32(ld, str, ld_data2); CPOUT_08(ld, str, ld_reserved, 32); bmodified(bh); } void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld) { gfs2_meta_header_print(&ld->ld_header); pv(ld, ld_type, "%u", "0x%x"); pv(ld, ld_length, "%u", "0x%x"); pv(ld, ld_data1, "%u", "0x%x"); pv(ld, ld_data2, "%u", "0x%x"); } void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf) { struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf; CPIN_64(sc, str, sc_total); CPIN_64(sc, str, sc_free); CPIN_64(sc, str, sc_dinodes); } void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf) { struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf; CPOUT_64(sc, str, sc_total); CPOUT_64(sc, str, sc_free); CPOUT_64(sc, str, sc_dinodes); } void gfs2_statfs_change_print(struct gfs2_statfs_change *sc) { pv(sc, sc_total, "%lld", "0x%llx"); pv(sc, sc_free, "%lld", "0x%llx"); pv(sc, sc_dinodes, "%lld", "0x%llx"); } void gfs2_quota_change_in(struct gfs2_quota_change *qc, struct gfs2_buffer_head *bh) { struct gfs2_quota_change *str = (struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); CPIN_64(qc, str, qc_change); CPIN_32(qc, str, qc_flags); CPIN_32(qc, str, qc_id); } void gfs2_quota_change_out(struct gfs2_quota_change *qc, struct gfs2_buffer_head *bh) { struct gfs2_quota_change *str = (struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); CPOUT_64(qc, str, qc_change); CPOUT_32(qc, str, qc_flags); CPOUT_32(qc, str, qc_id); bmodified(bh); } void gfs2_quota_change_print(struct gfs2_quota_change *qc) { pv(qc, qc_change, "%lld", "0x%llx"); pv(qc, qc_flags, "0x%.8X", NULL); pv(qc, qc_id, "%u", "0x%x"); } gfs2-utils/gfs2/libgfs2/parser.y0000664000175000017500000000753512154127655015430 0ustar andyandy%code requires { /* Required to break a circular dependency introduced with bison 2.6 */ typedef void* yyscan_t; } %code top { #include #include "lang.h" #include "lexer.h" static int yyerror(struct lgfs2_lang_state *state, yyscan_t lexer, const char *errorstr) { fprintf(stderr, "%d:%d: %s\n", state->ls_linenum, state->ls_colnum, errorstr); return 1; } } %defines %debug %define api.pure %parse-param { struct lgfs2_lang_state *state } %parse-param { yyscan_t lexer } %lex-param { yyscan_t lexer } %start script %token TOK_COLON %token TOK_COMMA %token TOK_ID %token TOK_LBRACE %token TOK_LBRACKET %token TOK_NUMBER %token TOK_OFFSET %token TOK_RBRACE %token TOK_RBRACKET %token TOK_SEMI %token TOK_SET %token TOK_GET %token TOK_STATE %token TOK_STRING %token TOK_PATH %% script: statements { state->ls_ast_root = $1; state->ls_interp_curr = $1; } | statements TOK_SEMI { state->ls_ast_root = $1; state->ls_interp_curr = $1; } ; statements: statements TOK_SEMI statement { state->ls_ast_tail->ast_left = $3; state->ls_ast_tail = $3; $$ = $1; } | statement { if (state->ls_ast_tail == NULL) state->ls_ast_tail = $1; $$ = $1; } ; statement: set_stmt { $$ = $1;} | get_stmt { $$ = $1; } ; set_stmt: TOK_SET blockspec structspec { $1->ast_right = $2; $2->ast_right = $3; $$ = $1; } | TOK_SET blockspec typespec structspec { $1->ast_right = $2; $2->ast_right = $3; $3->ast_right = $4; $$ = $1; } ; get_stmt: TOK_GET blockspec { $1->ast_right = $2; $$ = $1; } | TOK_GET blockspec TOK_STATE { $1->ast_right = $2; $2->ast_right = $3; $$ = $1; } ; blockspec: offset { $$ = $1; } | address { $$ = $1; } | path { $$ = $1; } | block_literal { $$ = $1; } | subscript { $$ = $1; } ; offset: blockspec TOK_OFFSET { $2->ast_left = $1; $$ = $2; } ; typespec: identifier { $1->ast_type = AST_EX_TYPESPEC; $$ = $1; } ; block_literal: identifier { $$ = $1; } ; subscript: block_literal TOK_LBRACKET index TOK_RBRACKET { $4->ast_left = $1; $1->ast_left = $3; $$ = $4; } ; index: number { $$ = $1; } | identifier { $$ = $1; } ; address: number { $1->ast_type = AST_EX_ADDRESS; $$ = $1; } ; structspec: TOK_LBRACE fieldspecs TOK_RBRACE { $$ = $2; } | TOK_LBRACE TOK_RBRACE { $$ = NULL; } ; fieldspecs: fieldspecs TOK_COMMA fieldspec { $1->ast_left = $3; $$ = $1; } | fieldspec { $$ = $1; } ; fieldspec: identifier TOK_COLON fieldvalue { $2->ast_right = $1; $1->ast_right = $3; $$ = $2; } ; fieldvalue: number { $$ = $1; } | string { $$ = $1; } ; number: TOK_NUMBER { $$ = $1; } string: TOK_STRING { $$ = $1; } identifier: TOK_ID { $$ = $1; } path: TOK_PATH { $$ = $1; } %% /** * Allocate and initialize a new parse state structure. The caller must free the * memory returned by this function. */ struct lgfs2_lang_state *lgfs2_lang_init(void) { struct lgfs2_lang_state *state; state = calloc(1, sizeof(struct lgfs2_lang_state)); if (state == NULL) { return NULL; } state->ls_linenum = 1; return state; } void lgfs2_lang_free(struct lgfs2_lang_state **state) { ast_destroy(&(*state)->ls_ast_root); free(*state); *state = NULL; } int lgfs2_lang_parsef(struct lgfs2_lang_state *state, FILE *src) { int ret = 0; yyscan_t lexer; ret = yylex_init_extra(state, &lexer); if (ret != 0) { fprintf(stderr, "Failed to initialize lexer.\n"); return ret; } yyset_in(src, lexer); ret = yyparse(state, lexer); yylex_destroy(lexer); return ret; } int lgfs2_lang_parses(struct lgfs2_lang_state *state, const char *cstr) { int ret; FILE *src; char *str = strdup(cstr); if (str == NULL) { perror("Failed to duplicate source string"); return 1; } src = fmemopen(str, strlen(str), "r"); if (src == NULL) { perror("Failed to open string as source file"); free(str); return 1; } ret = lgfs2_lang_parsef(state, src); fclose(src); free(str); if (ret != 0 || state->ls_errnum != 0) { return 1; } return 0; } gfs2-utils/gfs2/libgfs2/recovery.c0000664000175000017500000001273512110647577015744 0ustar andyandy#include "clusterautoconfig.h" /* * NOTE: * * This code was pilfered from the gfs2 kernel and adapted to userland. * If you change this part, you should evaluate whether the upstream kernel * version of recovery.c should be changed as well. Likewise, if the * upstream version changes, this part should be kept in sync. * */ #include #include #include "libgfs2.h" void gfs2_replay_incr_blk(struct gfs2_inode *ip, unsigned int *blk) { uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize; if (++*blk == jd_blocks) *blk = 0; } int gfs2_replay_read_block(struct gfs2_inode *ip, unsigned int blk, struct gfs2_buffer_head **bh) { int new = 0; uint64_t dblock; uint32_t extlen; block_map(ip, blk, &new, &dblock, &extlen, FALSE); if (!dblock) return -EIO; *bh = bread(ip->i_sbd, dblock); return 0; } /** * get_log_header - read the log header for a given segment * @ip: the journal incore inode * @blk: the block to look at * @lh: the log header to return * * Read the log header for a given segement in a given journal. Do a few * sanity checks on it. * * Returns: 0 on success, * 1 if the header was invalid or incomplete, * errno on error */ int get_log_header(struct gfs2_inode *ip, unsigned int blk, struct gfs2_log_header *head) { struct gfs2_buffer_head *bh; struct gfs2_log_header lh, *tmp; uint32_t hash, saved_hash; int error; error = gfs2_replay_read_block(ip, blk, &bh); if (error) return error; tmp = (struct gfs2_log_header *)bh->b_data; saved_hash = tmp->lh_hash; tmp->lh_hash = 0; hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); tmp->lh_hash = saved_hash; gfs2_log_header_in(&lh, bh); brelse(bh); if (error || lh.lh_blkno != blk || lh.lh_hash != hash) return 1; *head = lh; return 0; } /** * find_good_lh - find a good log header * @ip: the journal incore inode * @blk: the segment to start searching from * @lh: the log header to fill in * @forward: if true search forward in the log, else search backward * * Call get_log_header() to get a log header for a segment, but if the * segment is bad, either scan forward or backward until we find a good one. * * Returns: errno */ int find_good_lh(struct gfs2_inode *ip, unsigned int *blk, struct gfs2_log_header *head) { unsigned int orig_blk = *blk; int error; uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize; for (;;) { error = get_log_header(ip, *blk, head); if (error <= 0) return error; if (++*blk == jd_blocks) *blk = 0; if (*blk == orig_blk) return -EIO; } } /** * jhead_scan - make sure we've found the head of the log * @jd: the journal * @head: this is filled in with the log descriptor of the head * * At this point, seg and lh should be either the head of the log or just * before. Scan forward until we find the head. * * Returns: errno */ int jhead_scan(struct gfs2_inode *ip, struct gfs2_log_header *head) { unsigned int blk = head->lh_blkno; uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize; struct gfs2_log_header lh; int error; for (;;) { if (++blk == jd_blocks) blk = 0; error = get_log_header(ip, blk, &lh); if (error < 0) return error; if (error == 1) continue; if (lh.lh_sequence == head->lh_sequence) return -EIO; if (lh.lh_sequence < head->lh_sequence) break; *head = lh; } return 0; } /** * gfs2_find_jhead - find the head of a log * @jd: the journal * @head: the log descriptor for the head of the log is returned here * * Do a binary search of a journal and find the valid log entry with the * highest sequence number. (i.e. the log head) * * Returns: errno */ int gfs2_find_jhead(struct gfs2_inode *ip, struct gfs2_log_header *head) { struct gfs2_log_header lh_1, lh_m; uint32_t blk_1, blk_2, blk_m; uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize; int error; blk_1 = 0; blk_2 = jd_blocks - 1; for (;;) { blk_m = (blk_1 + blk_2) / 2; error = find_good_lh(ip, &blk_1, &lh_1); if (error) return error; error = find_good_lh(ip, &blk_m, &lh_m); if (error) return error; if (blk_1 == blk_m || blk_m == blk_2) break; if (lh_1.lh_sequence <= lh_m.lh_sequence) blk_1 = blk_m; else blk_2 = blk_m; } error = jhead_scan(ip, &lh_1); if (error) return error; *head = lh_1; return error; } /** * clean_journal - mark a dirty journal as being clean * @sdp: the filesystem * @jd: the journal * @head: the head journal to start from * * Returns: errno */ int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head) { unsigned int lblock; struct gfs2_log_header *lh; uint32_t hash, extlen; struct gfs2_buffer_head *bh; int new = 0; uint64_t dblock; lblock = head->lh_blkno; gfs2_replay_incr_blk(ip, &lblock); block_map(ip, lblock, &new, &dblock, &extlen, 0); if (!dblock) return -EIO; bh = bread(ip->i_sbd, dblock); memset(bh->b_data, 0, ip->i_sbd->bsize); lh = (struct gfs2_log_header *)bh->b_data; memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); lh->lh_blkno = cpu_to_be32(lblock); hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header)); lh->lh_hash = cpu_to_be32(hash); bmodified(bh); brelse(bh); return 0; } gfs2-utils/gfs2/libgfs2/rgrp.c0000664000175000017500000002512612164515756015060 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include "libgfs2.h" #define RG_SYNC_TOLERANCE 1000 /** * gfs2_compute_bitstructs - Compute the bitmap sizes * bsize: Block size * rgd: The resource group descriptor * Returns: 0 on success, -1 on error */ int gfs2_compute_bitstructs(const uint32_t bsize, struct rgrp_tree *rgd) { struct gfs2_bitmap *bits; uint32_t length = rgd->ri.ri_length; uint32_t bytes_left, bytes; int x; /* Max size of an rg is 2GB. A 2GB RG with (minimum) 512-byte blocks has 4194304 blocks. We can represent 4 blocks in one bitmap byte. Therefore, all 4194304 blocks can be represented in 1048576 bytes. Subtract a metadata header for each 512-byte block and we get 488 bytes of bitmap per block. Divide 1048576 by 488 and we can be assured we should never have more than 2149 of them. */ if (length > 2149 || length == 0) return -1; if(rgd->bits == NULL && !(rgd->bits = (struct gfs2_bitmap *) malloc(length * sizeof(struct gfs2_bitmap)))) return -1; if(!memset(rgd->bits, 0, length * sizeof(struct gfs2_bitmap))) return -1; bytes_left = rgd->ri.ri_bitbytes; for (x = 0; x < length; x++){ bits = &rgd->bits[x]; if (length == 1){ bytes = bytes_left; bits->bi_offset = sizeof(struct gfs2_rgrp); bits->bi_start = 0; bits->bi_len = bytes; } else if (x == 0){ bytes = bsize - sizeof(struct gfs2_rgrp); bits->bi_offset = sizeof(struct gfs2_rgrp); bits->bi_start = 0; bits->bi_len = bytes; } else if (x + 1 == length){ bytes = bytes_left; bits->bi_offset = sizeof(struct gfs2_meta_header); bits->bi_start = rgd->ri.ri_bitbytes - bytes_left; bits->bi_len = bytes; } else{ bytes = bsize - sizeof(struct gfs2_meta_header); bits->bi_offset = sizeof(struct gfs2_meta_header); bits->bi_start = rgd->ri.ri_bitbytes - bytes_left; bits->bi_len = bytes; } bytes_left -= bytes; } if(bytes_left) return -1; if((rgd->bits[length - 1].bi_start + rgd->bits[length - 1].bi_len) * GFS2_NBBY != rgd->ri.ri_data) return -1; if (rgd->bh) /* If we already have a bh allocated */ return 0; /* don't want to allocate another */ if(!(rgd->bh = (struct gfs2_buffer_head **) malloc(length * sizeof(struct gfs2_buffer_head *)))) return -1; if(!memset(rgd->bh, 0, length * sizeof(struct gfs2_buffer_head *))) return -1; return 0; } /** * blk2rgrpd - Find resource group for a given data block number * @sdp: The GFS superblock * @n: The data block number * * Returns: Ths resource group, or NULL if not found */ struct rgrp_tree *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk) { struct rgrp_tree *rgd = (struct rgrp_tree *)sdp->rgtree.osi_node; while (rgd) { if (blk < rgd->ri.ri_addr) rgd = (struct rgrp_tree *)rgd->node.osi_left; else if (blk >= rgd->ri.ri_data0 + rgd->ri.ri_data) rgd = (struct rgrp_tree *)rgd->node.osi_right; else return rgd; } return NULL; } /** * gfs2_rgrp_read - read in the resource group information from disk. * @rgd - resource group structure * returns: 0 if no error, otherwise the block number that failed */ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_tree *rgd) { int x, length = rgd->ri.ri_length; uint64_t max_rgrp_bitbytes, max_rgrp_len; /* Max size of an rgrp is 2GB. Figure out how many blocks that is: */ max_rgrp_bitbytes = ((2147483648 / sdp->bsize) / GFS2_NBBY); max_rgrp_len = max_rgrp_bitbytes / sdp->bsize; if (!length && length > max_rgrp_len) return -1; if (gfs2_check_range(sdp, rgd->ri.ri_addr)) return -1; if (breadm(sdp, rgd->bh, length, rgd->ri.ri_addr)) return -1; for (x = 0; x < length; x++){ if(gfs2_check_meta(rgd->bh[x], (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG)) { uint64_t error; error = rgd->ri.ri_addr + x; for (; x >= 0; x--) { brelse(rgd->bh[x]); rgd->bh[x] = NULL; } return error; } } if (rgd->bh && rgd->bh[0]) { if (sdp->gfs1) gfs_rgrp_in((struct gfs_rgrp *)&rgd->rg, rgd->bh[0]); else gfs2_rgrp_in(&rgd->rg, rgd->bh[0]); } return 0; } void gfs2_rgrp_relse(struct rgrp_tree *rgd) { int x, length = rgd->ri.ri_length; for (x = 0; x < length; x++) { if (rgd->bh) { if (rgd->bh[x]) brelse(rgd->bh[x]); rgd->bh[x] = NULL; } } } struct rgrp_tree *rgrp_insert(struct osi_root *rgtree, uint64_t rgblock) { struct osi_node **newn = &rgtree->osi_node, *parent = NULL; struct rgrp_tree *data; /* Figure out where to put new node */ while (*newn) { struct rgrp_tree *cur = (struct rgrp_tree *)*newn; parent = *newn; if (rgblock < cur->ri.ri_addr) newn = &((*newn)->osi_left); else if (rgblock > cur->ri.ri_addr) newn = &((*newn)->osi_right); else return cur; } data = malloc(sizeof(struct rgrp_tree)); if (!data) return NULL; if (!memset(data, 0, sizeof(struct rgrp_tree))) return NULL; /* Add new node and rebalance tree. */ data->ri.ri_addr = rgblock; osi_link_node(&data->node, parent, newn); osi_insert_color(&data->node, rgtree); return data; } void gfs2_rgrp_free(struct osi_root *rgrp_tree) { struct rgrp_tree *rgd; int rgs_since_sync = 0; struct osi_node *n; struct gfs2_sbd *sdp = NULL; while ((n = osi_first(rgrp_tree))) { rgd = (struct rgrp_tree *)n; if (rgd->bh && rgd->bh[0]) { /* if a buffer exists */ rgs_since_sync++; if (rgs_since_sync >= RG_SYNC_TOLERANCE) { if (!sdp) sdp = rgd->bh[0]->sdp; fsync(sdp->device_fd); rgs_since_sync = 0; } gfs2_rgrp_relse(rgd); /* free them all. */ } if(rgd->bits) free(rgd->bits); if(rgd->bh) { free(rgd->bh); rgd->bh = NULL; } osi_erase(&rgd->node, rgrp_tree); free(rgd); } } /** * This structure is defined in libgfs2.h as an opaque type. It stores the * constants and context required for creating resource groups from any point * in an application. */ struct _lgfs2_rgrps { struct osi_root root; uint64_t nextaddr; unsigned bsize; unsigned long align; unsigned long align_off; unsigned long curr_offset; uint64_t maxrgsz; uint64_t minrgsz; uint64_t devlen; uint64_t count; uint64_t blks_total; uint32_t rgsize; }; static uint64_t align_block(const uint64_t base, const uint64_t align) { if ((align > 0) && ((base % align) > 0)) return (base - (base % align)) + align; return base; } /** * Create and initialise an empty set of resource groups * bsize: The block size of the fs * start: The block address of the first resource group * devlen: The length of the device, in fs blocks * rglen: Default rg size, in blocks * al: The required alignment of the resource groups * Returns an initialised lgfs2_rgrps_t or NULL if unsuccessful with errno set */ lgfs2_rgrps_t lgfs2_rgrps_init(unsigned bsize, uint64_t start, uint64_t devlen, uint32_t rglen, struct lgfs2_rgrp_align *al) { lgfs2_rgrps_t rgs = calloc(1, sizeof(*rgs)); if (rgs == NULL) return NULL; rgs->bsize = bsize; rgs->maxrgsz = (GFS2_MAX_RGSIZE << 20) / bsize; rgs->minrgsz = (GFS2_MIN_RGSIZE << 20) / bsize; rgs->rgsize = rglen; rgs->devlen = devlen; rgs->align = al->base; rgs->align_off = al->offset; memset(&rgs->root, 0, sizeof(rgs->root)); rgs->nextaddr = align_block(start, rgs->align); return rgs; } /** * Return the rindex structure relating to a a resource group. */ struct gfs2_rindex *lgfs2_rgrp_index(lgfs2_rgrp_t rg) { return &rg->ri; } /** * Return non-zero if there is space left for more resource groups or zero if not */ int lgfs2_rgrps_end(lgfs2_rgrps_t rgs) { return (rgs->nextaddr == 0); } /** * Returns the total resource group size, in blocks, required to give blksreq data blocks */ unsigned lgfs2_rgsize_for_data(uint64_t blksreq, unsigned bsize) { const uint32_t blks_rgrp = GFS2_NBBY * (bsize - sizeof(struct gfs2_rgrp)); const uint32_t blks_meta = GFS2_NBBY * (bsize - sizeof(struct gfs2_meta_header)); unsigned bitblocks = 1; if (blksreq > blks_rgrp) bitblocks += ((blksreq - blks_rgrp) + blks_meta - 1) / blks_meta; return bitblocks + blksreq; } // Temporary function to aid in API migration struct osi_node *lgfs2_rgrps_root(lgfs2_rgrps_t rgs) { return rgs->root.osi_node; } /** * Create a new resource group after the last resource group in a set. * rgs: The set of resource groups * rglen: The required length of the resource group. If its is 0 the default rgsize * passed to lgfs2_rgrps_init() is used. * expand: Whether to expand the resource group when alignment would leave a gap. * Returns the new resource group on success or NULL on failure. */ lgfs2_rgrp_t lgfs2_rgrp_append(lgfs2_rgrps_t rgs, uint32_t rglen, int expand) { int err = 0; lgfs2_rgrp_t rg = rgrp_insert(&rgs->root, rgs->nextaddr); if (rg == NULL) return NULL; rgs->curr_offset += rgs->align_off; if (rgs->curr_offset >= rgs->align) rgs->curr_offset = 0; if (rgs->rgsize > rglen) rglen = rgs->rgsize; rgs->nextaddr = align_block(rg->ri.ri_addr + rgs->rgsize, rgs->align) + rgs->curr_offset; /* Use up gap left by alignment if possible */ if (expand && ((rgs->nextaddr - rg->ri.ri_addr) <= rgs->maxrgsz)) rglen = rgs->nextaddr - rg->ri.ri_addr; if ((rgs->nextaddr + rgs->rgsize) > rgs->devlen) { /* Squeeze the last 1 or 2 rgs into the remaining space */ if ((rgs->nextaddr < rgs->devlen) && ((rgs->devlen - rgs->nextaddr) >= rgs->minrgsz)) { rgs->rgsize = rgs->devlen - rgs->nextaddr; } else { if (rgs->devlen - rg->ri.ri_addr <= rgs->maxrgsz) rglen = rgs->devlen - rg->ri.ri_addr; else rglen = rgs->maxrgsz; /* This is the last rg */ rgs->nextaddr = 0; } } rg->ri.ri_length = rgblocks2bitblocks(rgs->bsize, rglen, &rg->ri.ri_data); rg->ri.ri_data0 = rg->ri.ri_addr + rg->ri.ri_length; rg->ri.ri_bitbytes = rg->ri.ri_data / GFS2_NBBY; rg->rg.rg_header.mh_magic = GFS2_MAGIC; rg->rg.rg_header.mh_type = GFS2_METATYPE_RG; rg->rg.rg_header.mh_format = GFS2_FORMAT_RG; rg->rg.rg_free = rg->ri.ri_data; err = gfs2_compute_bitstructs(rgs->bsize, rg); if (err != 0) return NULL; rgs->blks_total += rg->ri.ri_data; rgs->count++; return rg; } /** * Write a resource group to a file descriptor. * Returns 0 on success or non-zero on failure with errno set */ int lgfs2_rgrp_write(int fd, lgfs2_rgrp_t rg, unsigned bsize) { ssize_t ret = 0; size_t len = rg->ri.ri_length * bsize; unsigned int i; const struct gfs2_meta_header bmh = { .mh_magic = GFS2_MAGIC, .mh_type = GFS2_METATYPE_RB, .mh_format = GFS2_FORMAT_RB, }; char *buff = calloc(len, 1); if (buff == NULL) return -1; gfs2_rgrp_out(&rg->rg, buff); for (i = 1; i < rg->ri.ri_length; i++) gfs2_meta_header_out(&bmh, buff + (i * bsize)); ret = pwrite(fd, buff, len, rg->ri.ri_addr * bsize); if (ret != len) { free(buff); return -1; } free(buff); return 0; } gfs2-utils/gfs2/libgfs2/structures.c0000664000175000017500000002664112154127655016330 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include #include #include #include "libgfs2.h" int build_master(struct gfs2_sbd *sdp) { struct gfs2_inum inum; uint64_t bn; struct gfs2_buffer_head *bh; int err = lgfs2_dinode_alloc(sdp, 1, &bn); if (err != 0) return -1; inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = bn; bh = init_dinode(sdp, &inum, S_IFDIR | 0755, GFS2_DIF_SYSTEM, &inum); sdp->master_dir = lgfs2_inode_get(sdp, bh); if (sdp->master_dir == NULL) return -1; if (sdp->debug) { printf("\nMaster dir:\n"); gfs2_dinode_print(&sdp->master_dir->i_di); } sdp->master_dir->bh_owned = 1; return 0; } void build_sb(struct gfs2_sbd *sdp, const unsigned char *uuid) { unsigned int x; struct gfs2_buffer_head *bh; struct gfs2_sb sb; /* Zero out the beginning of the device up to the superblock */ for (x = 0; x < sdp->sb_addr; x++) { bh = bget(sdp, x); memset(bh->b_data, 0, sdp->bsize); bmodified(bh); brelse(bh); } memset(&sb, 0, sizeof(struct gfs2_sb)); sb.sb_header.mh_magic = GFS2_MAGIC; sb.sb_header.mh_type = GFS2_METATYPE_SB; sb.sb_header.mh_format = GFS2_FORMAT_SB; sb.sb_fs_format = GFS2_FORMAT_FS; sb.sb_multihost_format = GFS2_FORMAT_MULTI; sb.sb_bsize = sdp->bsize; sb.sb_bsize_shift = ffs(sdp->bsize) - 1; sb.sb_master_dir = sdp->master_dir->i_di.di_num; sb.sb_root_dir = sdp->md.rooti->i_di.di_num; strcpy(sb.sb_lockproto, sdp->lockproto); strcpy(sb.sb_locktable, sdp->locktable); #ifdef GFS2_HAS_UUID memcpy(sb.sb_uuid, uuid, sizeof(sb.sb_uuid)); #endif bh = bget(sdp, sdp->sb_addr); gfs2_sb_out(&sb, bh); brelse(bh); if (sdp->debug) { printf("\nSuper Block:\n"); gfs2_sb_print(&sb); } } int write_journal(struct gfs2_sbd *sdp, unsigned int j, unsigned int blocks) { struct gfs2_log_header lh; unsigned int x; uint64_t seq = ((blocks) * (random() / (RAND_MAX + 1.0))); uint32_t hash; unsigned int height; /* Build the height up so our journal blocks will be contiguous and */ /* not broken up by indirect block pages. */ height = calc_tree_height(sdp->md.journal[j], (blocks + 1) * sdp->bsize); build_height(sdp->md.journal[j], height); memset(&lh, 0, sizeof(struct gfs2_log_header)); lh.lh_header.mh_magic = GFS2_MAGIC; lh.lh_header.mh_type = GFS2_METATYPE_LH; lh.lh_header.mh_format = GFS2_FORMAT_LH; lh.lh_flags = GFS2_LOG_HEAD_UNMOUNT; for (x = 0; x < blocks; x++) { struct gfs2_buffer_head *bh = get_file_buf(sdp->md.journal[j], x, TRUE); if (!bh) return -1; bmodified(bh); brelse(bh); } for (x = 0; x < blocks; x++) { struct gfs2_buffer_head *bh = get_file_buf(sdp->md.journal[j], x, FALSE); if (!bh) return -1; memset(bh->b_data, 0, sdp->bsize); lh.lh_sequence = seq; lh.lh_blkno = x; gfs2_log_header_out(&lh, bh); hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); ((struct gfs2_log_header *)bh->b_data)->lh_hash = cpu_to_be32(hash); bmodified(bh); brelse(bh); if (++seq == blocks) seq = 0; } if (sdp->debug) { printf("\nJournal %u:\n", j); gfs2_dinode_print(&sdp->md.journal[j]->i_di); } return 0; } int build_journal(struct gfs2_sbd *sdp, int j, struct gfs2_inode *jindex) { char name[256]; int ret; sprintf(name, "journal%u", j); sdp->md.journal[j] = createi(jindex, name, S_IFREG | 0600, GFS2_DIF_SYSTEM); if (sdp->md.journal[j] == NULL) { return errno; } ret = write_journal(sdp, j, sdp->jsize << 20 >> sdp->sd_sb.sb_bsize_shift); return ret; } int build_jindex(struct gfs2_sbd *sdp) { struct gfs2_inode *jindex; unsigned int j; int ret; jindex = createi(sdp->master_dir, "jindex", S_IFDIR | 0700, GFS2_DIF_SYSTEM); if (jindex == NULL) { return errno; } sdp->md.journal = malloc(sdp->md.journals * sizeof(struct gfs2_inode *)); for (j = 0; j < sdp->md.journals; j++) { ret = build_journal(sdp, j, jindex); if (ret) return ret; inode_put(&sdp->md.journal[j]); } if (sdp->debug) { printf("\nJindex:\n"); gfs2_dinode_print(&jindex->i_di); } free(sdp->md.journal); inode_put(&jindex); return 0; } static int build_inum_range(struct gfs2_inode *per_node, unsigned int j) { struct gfs2_sbd *sdp = per_node->i_sbd; char name[256]; struct gfs2_inode *ip; sprintf(name, "inum_range%u", j); ip = createi(per_node, name, S_IFREG | 0600, GFS2_DIF_SYSTEM | GFS2_DIF_JDATA); if (ip == NULL) { return errno; } ip->i_di.di_size = sizeof(struct gfs2_inum_range); gfs2_dinode_out(&ip->i_di, ip->i_bh); if (sdp->debug) { printf("\nInum Range %u:\n", j); gfs2_dinode_print(&ip->i_di); } inode_put(&ip); return 0; } static int build_statfs_change(struct gfs2_inode *per_node, unsigned int j) { struct gfs2_sbd *sdp = per_node->i_sbd; char name[256]; struct gfs2_inode *ip; sprintf(name, "statfs_change%u", j); ip = createi(per_node, name, S_IFREG | 0600, GFS2_DIF_SYSTEM | GFS2_DIF_JDATA); if (ip == NULL) { return errno; } ip->i_di.di_size = sizeof(struct gfs2_statfs_change); gfs2_dinode_out(&ip->i_di, ip->i_bh); if (sdp->debug) { printf("\nStatFS Change %u:\n", j); gfs2_dinode_print(&ip->i_di); } inode_put(&ip); return 0; } static int build_quota_change(struct gfs2_inode *per_node, unsigned int j) { struct gfs2_sbd *sdp = per_node->i_sbd; struct gfs2_meta_header mh; char name[256]; struct gfs2_inode *ip; unsigned int blocks = sdp->qcsize << (20 - sdp->sd_sb.sb_bsize_shift); unsigned int x; unsigned int hgt; struct gfs2_buffer_head *bh; memset(&mh, 0, sizeof(struct gfs2_meta_header)); mh.mh_magic = GFS2_MAGIC; mh.mh_type = GFS2_METATYPE_QC; mh.mh_format = GFS2_FORMAT_QC; sprintf(name, "quota_change%u", j); ip = createi(per_node, name, S_IFREG | 0600, GFS2_DIF_SYSTEM); if (ip == NULL) { return errno; } hgt = calc_tree_height(ip, (blocks + 1) * sdp->bsize); build_height(ip, hgt); for (x = 0; x < blocks; x++) { bh = get_file_buf(ip, x, FALSE); if (!bh) return -1; memset(bh->b_data, 0, sdp->bsize); gfs2_meta_header_out_bh(&mh, bh); brelse(bh); } if (sdp->debug) { printf("\nQuota Change %u:\n", j); gfs2_dinode_print(&ip->i_di); } inode_put(&ip); return 0; } int build_per_node(struct gfs2_sbd *sdp) { struct gfs2_inode *per_node; unsigned int j; int err; per_node = createi(sdp->master_dir, "per_node", S_IFDIR | 0700, GFS2_DIF_SYSTEM); if (per_node == NULL) { return errno; } for (j = 0; j < sdp->md.journals; j++) { err = build_inum_range(per_node, j); if (err) { return err; } err = build_statfs_change(per_node, j); if (err) { return err; } err = build_quota_change(per_node, j); if (err) { return err; } } if (sdp->debug) { printf("\nper_node:\n"); gfs2_dinode_print(&per_node->i_di); } inode_put(&per_node); return 0; } int build_inum(struct gfs2_sbd *sdp) { struct gfs2_inode *ip; ip = createi(sdp->master_dir, "inum", S_IFREG | 0600, GFS2_DIF_SYSTEM | GFS2_DIF_JDATA); if (ip == NULL) { return errno; } if (sdp->debug) { printf("\nInum Inode:\n"); gfs2_dinode_print(&ip->i_di); } inode_put(&ip); return 0; } int build_statfs(struct gfs2_sbd *sdp) { struct gfs2_inode *ip; ip = createi(sdp->master_dir, "statfs", S_IFREG | 0600, GFS2_DIF_SYSTEM | GFS2_DIF_JDATA); if (ip == NULL) { return errno; } if (sdp->debug) { printf("\nStatFS Inode:\n"); gfs2_dinode_print(&ip->i_di); } inode_put(&ip); return 0; } int build_rindex(struct gfs2_sbd *sdp) { struct gfs2_inode *ip; struct osi_node *n, *next = NULL; struct rgrp_tree *rl; char buf[sizeof(struct gfs2_rindex)]; int count; ip = createi(sdp->master_dir, "rindex", S_IFREG | 0600, GFS2_DIF_SYSTEM | GFS2_DIF_JDATA); if (ip == NULL) { return errno; } ip->i_di.di_payload_format = GFS2_FORMAT_RI; bmodified(ip->i_bh); for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rl = (struct rgrp_tree *)n; gfs2_rindex_out(&rl->ri, buf); count = gfs2_writei(ip, buf, ip->i_di.di_size, sizeof(struct gfs2_rindex)); if (count != sizeof(struct gfs2_rindex)) return -1; } memset(buf, 0, sizeof(struct gfs2_rindex)); count = __gfs2_writei(ip, buf, ip->i_di.di_size, sizeof(struct gfs2_rindex), 0); if (count != sizeof(struct gfs2_rindex)) return -1; if (sdp->debug) { printf("\nResource Index:\n"); gfs2_dinode_print(&ip->i_di); } inode_put(&ip); return 0; } int build_quota(struct gfs2_sbd *sdp) { struct gfs2_inode *ip; struct gfs2_quota qu; char buf[sizeof(struct gfs2_quota)]; int count; ip = createi(sdp->master_dir, "quota", S_IFREG | 0600, GFS2_DIF_SYSTEM | GFS2_DIF_JDATA); if (ip == NULL) { return errno; } ip->i_di.di_payload_format = GFS2_FORMAT_QU; bmodified(ip->i_bh); memset(&qu, 0, sizeof(struct gfs2_quota)); qu.qu_value = 1; gfs2_quota_out(&qu, buf); count = gfs2_writei(ip, buf, ip->i_di.di_size, sizeof(struct gfs2_quota)); if (count != sizeof(struct gfs2_quota)) return -1; count = gfs2_writei(ip, buf, ip->i_di.di_size, sizeof(struct gfs2_quota)); if (count != sizeof(struct gfs2_quota)) return -1; if (sdp->debug) { printf("\nRoot quota:\n"); gfs2_quota_print(&qu); } inode_put(&ip); return 0; } int build_root(struct gfs2_sbd *sdp) { struct gfs2_inum inum; uint64_t bn; struct gfs2_buffer_head *bh; int err = lgfs2_dinode_alloc(sdp, 1, &bn); if (err != 0) return -1; inum.no_formal_ino = sdp->md.next_inum++; inum.no_addr = bn; bh = init_dinode(sdp, &inum, S_IFDIR | 0755, 0, &inum); sdp->md.rooti = lgfs2_inode_get(sdp, bh); if (sdp->md.rooti == NULL) return -1; if (sdp->debug) { printf("\nRoot directory:\n"); gfs2_dinode_print(&sdp->md.rooti->i_di); } sdp->md.rooti->bh_owned = 1; return 0; } int do_init_inum(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = sdp->md.inum; uint64_t buf; int count; buf = cpu_to_be64(sdp->md.next_inum); count = gfs2_writei(ip, &buf, 0, sizeof(uint64_t)); if (count != sizeof(uint64_t)) return -1; if (sdp->debug) printf("\nNext Inum: %"PRIu64"\n", sdp->md.next_inum); return 0; } int do_init_statfs(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = sdp->md.statfs; struct gfs2_statfs_change sc; char buf[sizeof(struct gfs2_statfs_change)]; int count; sc.sc_total = sdp->blks_total; sc.sc_free = sdp->blks_total - sdp->blks_alloced; sc.sc_dinodes = sdp->dinodes_alloced; gfs2_statfs_change_out(&sc, buf); count = gfs2_writei(ip, buf, 0, sizeof(struct gfs2_statfs_change)); if (count != sizeof(struct gfs2_statfs_change)) return -1; if (sdp->debug) { printf("\nStatfs:\n"); gfs2_statfs_change_print(&sc); } return 0; } int gfs2_check_meta(struct gfs2_buffer_head *bh, int type) { uint32_t check_magic = ((struct gfs2_meta_header *)(bh->b_data))->mh_magic; uint32_t check_type = ((struct gfs2_meta_header *)(bh->b_data))->mh_type; check_magic = be32_to_cpu(check_magic); check_type = be32_to_cpu(check_type); if((check_magic != GFS2_MAGIC) || (type && (check_type != type))) return -1; return 0; } unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, uint64_t *buf, uint8_t state) { struct gfs2_bitmap *bi = &rgd->bits[idx]; unsigned n = 0; uint32_t blk = 0; while(blk < (bi->bi_len * GFS2_NBBY)) { blk = gfs2_bitfit((const unsigned char *)rgd->bh[idx]->b_data + bi->bi_offset, bi->bi_len, blk, state); if (blk == BFITNOENT) break; buf[n++] = blk + (bi->bi_start * GFS2_NBBY) + rgd->ri.ri_data0; blk++; } return n; } gfs2-utils/gfs2/libgfs2/super.c0000664000175000017500000002044712144433405015231 0ustar andyandy#include "clusterautoconfig.h" #include #include #include #include #include #include #include #include #include "libgfs2.h" #include "osi_list.h" /** * check_sb - Check superblock * @sb: The superblock * * Checks the version code of the FS is one that we understand how to * read and that the sizes of the various on-disk structures have not * changed. * * Returns: -1 on failure, 1 if this is gfs (gfs1), 2 if this is gfs2 */ int check_sb(struct gfs2_sb *sb) { if (sb->sb_header.mh_magic != GFS2_MAGIC || sb->sb_header.mh_type != GFS2_METATYPE_SB) { errno = EIO; return -1; } if (sb->sb_fs_format == GFS_FORMAT_FS && sb->sb_header.mh_format == GFS_FORMAT_SB && sb->sb_multihost_format == GFS_FORMAT_MULTI) { return 1; } return 2; } /* * read_sb: read the super block from disk * sdp: in-core super block * * This function reads in the super block from disk and * initializes various constants maintained in the super * block * * Returns: 0 on success, -1 on failure * sdp->gfs1 will be set if this is gfs (gfs1) */ int read_sb(struct gfs2_sbd *sdp) { struct gfs2_buffer_head *bh; uint64_t space = 0; unsigned int x; int ret; bh = bread(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); gfs2_sb_in(&sdp->sd_sb, bh); brelse(bh); ret = check_sb(&sdp->sd_sb); if (ret < 0) return ret; if (ret == 1) sdp->gfs1 = 1; sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT; sdp->bsize = sdp->sd_sb.sb_bsize; if (sdp->gfs1) { sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) / sizeof(uint64_t); sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_indirect)) / sizeof(uint64_t); } else { sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) / sizeof(uint64_t); sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(uint64_t); } sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); sdp->sd_hash_bsize = sdp->bsize / 2; sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t); sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; for (x = 2; ; x++){ space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; /* FIXME: Do we really need this first check?? */ if (space / sdp->sd_inptrs != sdp->sd_heightsize[x - 1] || space % sdp->sd_inptrs != 0) break; sdp->sd_heightsize[x] = space; } if (x > GFS2_MAX_META_HEIGHT){ errno = E2BIG; return -1; } sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; for (x = 2; ; x++){ space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; if (space / sdp->sd_inptrs != sdp->sd_jheightsize[x - 1] || space % sdp->sd_inptrs != 0) break; sdp->sd_jheightsize[x] = space; } sdp->sd_max_jheight = x; if(sdp->sd_max_jheight > GFS2_MAX_META_HEIGHT) { errno = E2BIG; return -1; } sdp->fssize = lseek(sdp->device_fd, 0, SEEK_END) / sdp->sd_sb.sb_bsize; sdp->sb_addr = GFS2_SB_ADDR * GFS2_BASIC_BLOCK / sdp->bsize; sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) * GFS2_NBBY; return 0; } /** * rindex_read - read in the rg index file * @sdp: the incore superblock pointer * fd: optional file handle for rindex file (if meta_fs file system is mounted) * (if fd is <= zero, it will read from raw device) * @count1: return count of the rgs. * @sane: return whether rindex is consistent * * Returns: 0 on success, -1 on failure */ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane) { unsigned int rg; int error; union { struct gfs2_rindex bufgfs2; } buf; struct gfs2_rindex ri; struct rgrp_tree *rgd = NULL, *prev_rgd = NULL; uint64_t prev_length = 0; *sane = 1; *count1 = 0; if (!fd && sdp->md.riinode->i_di.di_size % sizeof(struct gfs2_rindex)) *sane = 0; /* rindex file size must be a multiple of 96 */ for (rg = 0; ; rg++) { if (fd > 0) error = read(fd, &buf, sizeof(struct gfs2_rindex)); else error = gfs2_readi(sdp->md.riinode, (char *)&buf.bufgfs2, rg * sizeof(struct gfs2_rindex), sizeof(struct gfs2_rindex)); if (!error) break; if (error != sizeof(struct gfs2_rindex)) return -1; gfs2_rindex_in(&ri, (char *)&buf.bufgfs2); rgd = rgrp_insert(&sdp->rgtree, ri.ri_addr); memcpy(&rgd->ri, &ri, sizeof(struct gfs2_rindex)); rgd->start = rgd->ri.ri_addr; if (prev_rgd) { /* If rg addresses go backwards, it's not sane (or it's converted from gfs1). */ if (!sdp->gfs1) { if (prev_rgd->start >= rgd->start) *sane = 0; /* If rg lengths are not consistent, it's not sane (or it's converted from gfs1). The first RG will be a different length due to space allocated for the superblock, so we can't detect this until we check rgrp 3, when we can compare the distance between rgrp 1 and rgrp 2. */ if (rg > 2 && prev_length && prev_length != rgd->start - prev_rgd->start) *sane = 0; } prev_length = rgd->start - prev_rgd->start; prev_rgd->length = rgrp_size(prev_rgd); } if(gfs2_compute_bitstructs(sdp->sd_sb.sb_bsize, rgd)) *sane = 0; (*count1)++; prev_rgd = rgd; } if (prev_rgd) prev_rgd->length = rgrp_size(prev_rgd); return 0; } #define RA_WINDOW 32 static unsigned gfs2_rgrp_reada(struct gfs2_sbd *sdp, unsigned cur_window, struct osi_node *n) { struct rgrp_tree *rgd; unsigned i; off_t start, len; for (i = 0; i < RA_WINDOW; i++, n = osi_next(n)) { if (n == NULL) return i; if (i < cur_window) continue; rgd = (struct rgrp_tree *)n; start = rgd->ri.ri_addr * sdp->bsize; len = rgd->ri.ri_length * sdp->bsize; posix_fadvise(sdp->device_fd, start, len, POSIX_FADV_WILLNEED); } return i; } /** * ri_update - attach rgrps to the super block * @sdp: incore superblock data * fd: optional file handle for rindex (through the meta_fs) * @rgcount: returned count of rgs * * Given the rgrp index inode, link in all rgrps into the super block * and be sure that they can be read. * * Returns: 0 on success, -1 on failure. */ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane, int quiet) { struct rgrp_tree *rgd; struct gfs2_rindex *ri; int count1 = 0, count2 = 0; uint64_t errblock = 0; uint64_t rmax = 0; struct osi_node *n, *next = NULL; unsigned ra_window = 0; /* Turn off generic readhead */ posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM); if (rindex_read(sdp, fd, &count1, sane)) goto fail; for (n = osi_first(&sdp->rgtree); n; n = next) { next = osi_next(n); rgd = (struct rgrp_tree *)n; /* Readahead resource group headers */ if (ra_window < RA_WINDOW/2) ra_window = gfs2_rgrp_reada(sdp, ra_window, n); /* Read resource group header */ errblock = gfs2_rgrp_read(sdp, rgd); if (errblock) return errblock; ra_window--; count2++; if (!quiet && count2 % 100 == 0) { printf("."); fflush(stdout); } ri = &rgd->ri; if (ri->ri_data0 + ri->ri_data - 1 > rmax) rmax = ri->ri_data0 + ri->ri_data - 1; } sdp->fssize = rmax; *rgcount = count1; if (count1 != count2) goto fail; posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); return 0; fail: posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); gfs2_rgrp_free(&sdp->rgtree); return -1; } int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane) { return __ri_update(sdp, fd, rgcount, sane, 1); } /** * gfs1_ri_update - attach rgrps to the super block * Stolen from libgfs2/super.c, but modified to handle gfs1. * @sdp: * * Given the rgrp index inode, link in all rgrps into the super block * and be sure that they can be read. * * Returns: 0 on success, -1 on failure. */ int gfs1_ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int quiet) { int sane; return __ri_update(sdp, fd, rgcount, &sane, quiet); } int write_sb(struct gfs2_sbd *sbp) { struct gfs2_buffer_head *bh; bh = bread(sbp, GFS2_SB_ADDR >> sbp->sd_fsb2bb_shift); gfs2_sb_out(&sbp->sd_sb, bh); brelse(bh); fsync(sbp->device_fd); /* make sure the change gets to disk ASAP */ return 0; } gfs2-utils/gfs2/libgfs2/target.mk0000664000175000017500000000005112110647577015545 0ustar andyandy $(eval $(call make-library,libgfs2.a)) gfs2-utils/gfs2/man/Makefile.am0000664000175000017500000000036112110647577015211 0ustar andyandyMAINTAINERCLEANFILES = Makefile.in dist_man_MANS = fsck.gfs2.8 \ gfs2.5 \ gfs2_convert.8 \ gfs2_edit.8 \ gfs2_grow.8 \ gfs2_jadd.8 \ mkfs.gfs2.8 \ tunegfs2.8 \ gfs2_lockcapture.8 \ gfs2_trace.8 gfs2-utils/gfs2/man/fsck.gfs2.80000664000175000017500000000577112110647577015046 0ustar andyandy.TH fsck.gfs2 8 .SH NAME fsck.gfs2 - Offline GFS and GFS2 file system checker .SH SYNOPSIS .B fsck.gfs2 [\fIOPTION\fR]... \fIDEVICE\fR .SH WARNING All computers \fImust\fP have the filesystem unmounted before running fsck.gfs2. Failure to unmount from all nodes in a cluster will likely result in filesystem corruption. .SH DESCRIPTION fsck.gfs2 will check that the GFS or GFS2 file system on a device is structurally valid. It should not be run on a mounted file system. If file system corruption is detected, it will attempt to repair the file system. There is a limit to what fsck.gfs2 can do. If important file system structures are destroyed, such that the checker cannot determine what the repairs should be, reparations could fail. GFS2 is a journaled file system, and as such should be able to repair damage to the file system on its own. However, faulty hardware has the ability to write incomplete blocks to a file system thereby causing corruption that GFS2 cannot fix. The first step to ensuring a healthy file system is the selection of reliable hardware (i.e. storage systems that will write complete blocks - even in the event of power failure). Note: Most file system checkers will not check the file system if it is "clean" (i.e. unmounted since the last use). The fsck.gfs program behaves differently because the storage may be shared among several nodes in a cluster, and therefore problems may have been introduced on a different computer. Therefore, fsck.gfs2 will always check the file system unless the -p (preen) option is used, in which case it follows special rules (see below). .SH OPTIONS .TP \fB-a\fP Same as the -p (preen) option. .TP \fB-f\fP Force checking even if the file system seems clean. .TP \fB-h\fP Help. This prints out the proper command line usage syntax. .TP \fB-q\fP Quiet. .TP \fB-n\fP No to all questions. By specifying this option, fsck.gfs2 will only show the changes that would be made, but not make any changes to the filesystem. .TP \fB-p\fP Preen (same as -a: automatically repair the file system if it is dirty, and safe to do so, otherwise exit.) Note: If the file system has locking protocol lock_nolock, the file system is considered a non-shared storage device and the fsck is deemed safe. However, fsck.gfs2 does not know whether it was called automatically from the init process, due to options in the /etc/fstab file. Therefore, if the locking protocol is lock_dlm and -a or -p was specified, fsck.gfs2 cannot determine whether the disk is mounted by other nodes in the cluster. Therefore, the fsck is deemed to be unsafe and a warning is given if any damage or dirty journals are found. In that case, the file system should be unmounted from all nodes in the cluster and fsck.gfs2 should be run manually without the -a or -p options. .TP \fB-V\fP Version. Print out the program version information. .TP \fB-v\fP Verbose operation. Print more information while running. .TP \fB-y\fP Yes to all questions. By specifying this option, fsck.gfs2 will not prompt before making changes. gfs2-utils/gfs2/man/gfs2.50000664000175000017500000002767212154127656014121 0ustar andyandy.TH gfs2 5 .SH NAME gfs2 \- GFS2 reference guide .SH SYNOPSIS Overview of the GFS2 filesystem .SH DESCRIPTION GFS2 is a clustered filesystem, designed for sharing data between multiple nodes connected to a common shared storage device. It can also be used as a local filesystem on a single node, however since the design is aimed at clusters, that will usually result in lower performance than using a filesystem designed specifically for single node use. GFS2 is a journaling filesystem and one journal is required for each node that will mount the filesystem. The one exception to that is spectator mounts which are equivalent to mounting a read-only block device and as such can neither recover a journal or write to the filesystem, so do not require a journal assigned to them. .SH MOUNT OPTIONS .TP \fBlockproto=\fP\fILockProtoName\fR This specifies which inter-node lock protocol is used by the GFS2 filesystem for this mount, overriding the default lock protocol name stored in the filesystem's on-disk superblock. The \fILockProtoName\fR must be one of the supported locking protocols, currently these are \fIlock_nolock\fR and \fIlock_dlm\fR. The default lock protocol name is written to disk initially when creating the filesystem with \fBmkfs.gfs2\fP(8), -p option. It can be changed on-disk by using the \fBgfs2_tool\fP(8) utility's \fBsb proto\fP command. The \fBlockproto\fP mount option should be used only under special circumstances in which you want to temporarily use a different lock protocol without changing the on-disk default. Using the incorrect lock protocol on a cluster filesystem mounted from more than one node will almost certainly result in filesystem corruption. .TP \fBlocktable=\fP\fILockTableName\fR This specifies the identity of the cluster and of the filesystem for this mount, overriding the default cluster/filesystem identify stored in the filesystem's on-disk superblock. The cluster/filesystem name is recognized globally throughout the cluster, and establishes a unique namespace for the inter-node locking system, enabling the mounting of multiple GFS2 filesystems. The format of \fILockTableName\fR is lock-module-specific. For \fIlock_dlm\fR, the format is \fIclustername:fsname\fR. For \fIlock_nolock\fR, the field is ignored. The default cluster/filesystem name is written to disk initially when creating the filesystem with \fBmkfs.gfs2\fP(8), -t option. It can be changed on-disk by using the \fBgfs2_tool\fP(8) utility's \fBsb table\fP command. The \fBlocktable\fP mount option should be used only under special circumstances in which you want to mount the filesystem in a different cluster, or mount it as a different filesystem name, without changing the on-disk default. .TP \fBlocalflocks\fP This flag tells GFS2 that it is running as a local (not clustered) filesystem, so it can allow the kernel VFS layer to do all flock and fcntl file locking. When running in cluster mode, these file locks require inter-node locks, and require the support of GFS2. When running locally, better performance is achieved by letting VFS handle the whole job. This is turned on automatically by the lock_nolock module. .TP \fBerrors=\fP\fI[panic|withdraw]\fR Setting errors=panic causes GFS2 to oops when encountering an error that would otherwise cause the mount to withdraw or print an assertion warning. The default setting is errors=withdraw. This option should not be used in a production system. It replaces the earlier \fBdebug\fP option on kernel versions 2.6.31 and above. .TP \fBacl\fP Enables POSIX Access Control List \fBacl\fP(5) support within GFS2. .TP \fBspectator\fP Mount this filesystem using a special form of read-only mount. The mount does not use one of the filesystem's journals. The node is unable to recover journals for other nodes. .TP \fBnorecovery\fP A synonym for spectator .TP \fBsuiddir\fP Sets owner of any newly created file or directory to be that of parent directory, if parent directory has S_ISUID permission attribute bit set. Sets S_ISUID in any new directory, if its parent directory's S_ISUID is set. Strips all execution bits on a new file, if parent directory owner is different from owner of process creating the file. Set this option only if you know why you are setting it. .TP \fBquota=\fP\fI[off/account/on]\fR Turns quotas on or off for a filesystem. Setting the quotas to be in the "account" state causes the per UID/GID usage statistics to be correctly maintained by the filesystem, limit and warn values are ignored. The default value is "off". .TP \fBdiscard\fP Causes GFS2 to generate "discard" I/O requests for blocks which have been freed. These can be used by suitable hardware to implement thin-provisioning and similar schemes. This feature is supported in kernel version 2.6.30 and above. .TP \fBbarrier\fP This option, which defaults to on, causes GFS2 to send I/O barriers when flushing the journal. The option is automatically turned off if the underlying device does not support I/O barriers. We highly recommend the use of I/O barriers with GFS2 at all times unless the block device is designed so that it cannot lose its write cache content (e.g. its on a UPS, or it doesn't have a write cache) .TP \fBcommit=\fP\fIsecs\fR This is similar to the ext3 \fBcommit=\fP option in that it sets the maximum number of seconds between journal commits if there is dirty data in the journal. The default is 60 seconds. This option is only provided in kernel versions 2.6.31 and above. .TP \fBdata=\fP\fI[ordered|writeback]\fR When data=ordered is set, the user data modified by a transaction is flushed to the disk before the transaction is committed to disk. This should prevent the user from seeing uninitialized blocks in a file after a crash. Data=writeback mode writes the user data to the disk at any time after it's dirtied. This doesn't provide the same consistency guarantee as ordered mode, but it should be slightly faster for some workloads. The default is ordered mode. .TP \fBmeta\fP This option results in selecting the meta filesystem root rather than the normal filesystem root. This option is normally only used by the GFS2 utility functions. Altering any file on the GFS2 meta filesystem may render the filesystem unusable, so only experts in the GFS2 on-disk layout should use this option. .TP \fBquota_quantum=\fP\fIsecs\fR This sets the number of seconds for which a change in the quota information may sit on one node before being written to the quota file. This is the preferred way to set this parameter. The value is an integer number of seconds greater than zero. The default is 60 seconds. Shorter settings result in faster updates of the lazy quota information and less likelihood of someone exceeding their quota. Longer settings make filesystem operations involving quotas faster and more efficient. .TP \fBstatfs_quantum=\fP\fIsecs\fR Setting statfs_quantum to 0 is the preferred way to set the slow version of statfs. The default value is 30 secs which sets the maximum time period before statfs changes will be syned to the master statfs file. This can be adjusted to allow for faster, less accurate statfs values or slower more accurate values. When set to 0, statfs will always report the true values. .TP \fBstatfs_percent=\fP\fIvalue\fR This setting provides a bound on the maximum percentage change in the statfs information on a local basis before it is synced back to the master statfs file, even if the time period has not expired. If the setting of statfs_quantum is 0, then this setting is ignored. .SH BUGS GFS2 doesn't support \fBerrors=\fP\fIremount-ro\fR or \fBdata=\fP\fIjournal\fR. It is not possible to switch support for user and group quotas on and off independently of each other. Some of the error messages are rather cryptic, if you encounter one of these messages check firstly that gfs_controld is running and secondly that you have enough journals on the filesystem for the number of nodes in use. .SH SEE ALSO \fBmount\fP(8) for general mount options, \fBchmod\fP(1) and \fBchmod\fP(2) for access permission flags, \fBacl\fP(5) for access control lists, \fBlvm\fP(8) for volume management, \fBccs\fP(7) for cluster management, \fBumount\fP(8), \fBinitrd\fP(4). The GFS2 documentation has been split into a number of sections: \fBgfs2_edit\fP(8) A GFS2 debug tool (use with caution) \fBfsck.gfs2\fP(8) The GFS2 file system checker \fBgfs2_grow\fP(8) Growing a GFS2 file system \fBgfs2_jadd\fP(8) Adding a journal to a GFS2 file system \fBmkfs.gfs2\fP(8) Make a GFS2 file system \fBgfs2_quota\fP(8) Manipulate GFS2 disk quotas \fBgfs2_tool\fP(8) Tool to manipulate a GFS2 file system (obsolete) \fBtunegfs2\fP(8) Tool to manipulate GFS2 superblocks .SH SETUP GFS2 clustering is driven by the dlm, which depends on dlm_controld to provide clustering from userspace. dlm_controld clustering is built on corosync cluster/group membership and messaging. Follow these steps to manually configure and run gfs2/dlm/corosync. .B 1. create /etc/corosync/corosync.conf and copy to all nodes In this sample, replace cluster_name and IP addresses, and add nodes as needed. If using only two nodes, uncomment the two_node line. See corosync.conf(5) for more information. .nf totem { version: 2 secauth: off cluster_name: abc } nodelist { node { ring0_addr: 10.10.10.1 nodeid: 1 } node { ring0_addr: 10.10.10.2 nodeid: 2 } node { ring0_addr: 10.10.10.3 nodeid: 3 } } quorum { provider: corosync_votequorum # two_node: 1 } logging { to_syslog: yes } .fi .PP .B 2. start corosync on all nodes .nf systemctl start corosync .fi Run corosync-quorumtool to verify that all nodes are listed. .PP .B 3. create /etc/dlm/dlm.conf and copy to all nodes .B * To use no fencing, use this line: .nf enable_fencing=0 .fi .B * To use no fencing, but exercise fencing functions, use this line: .nf fence_all /bin/true .fi The "true" binary will be executed for all nodes and will succeed (exit 0) immediately. .B * To use manual fencing, use this line: .nf fence_all /bin/false .fi The "false" binary will be executed for all nodes and will fail (exit 1) immediately. When a node fails, manually run: dlm_tool fence_ack .B * To use stonith/pacemaker for fencing, use this line: .nf fence_all /usr/sbin/dlm_stonith .fi The "dlm_stonith" binary will be executed for all nodes. If stonith/pacemaker systems are not available, dlm_stonith will fail and this config becomes the equivalent of the previous /bin/false config. .B * To use an APC power switch, use these lines: .nf device apc /usr/sbin/fence_apc ipaddr=1.1.1.1 login=admin password=pw connect apc node=1 port=1 connect apc node=2 port=2 connect apc node=3 port=3 .fi Other network switch based agents are configured similarly. .B * To use sanlock/watchdog fencing, use these lines: .nf device wd /usr/sbin/fence_sanlock path=/dev/fence/leases connect wd node=1 host_id=1 connect wd node=2 host_id=2 unfence wd .fi See fence_sanlock(8) for more information. .B * For other fencing configurations see dlm.conf(5) man page. .PP .B 4. start dlm_controld on all nodes .nf systemctl start dlm .fi Run "dlm_tool status" to verify that all nodes are listed. .PP .B 5. if using clvm, start clvmd on all nodes systemctl clvmd start .PP .B 6. make new gfs2 file systems mkfs.gfs2 -p lock_dlm -t cluster_name:fs_name -j num /path/to/storage The cluster_name must match the name used in step 1 above. The fs_name must be a unique name in the cluster. The -j option is the number of journals to create, there must be one for each node that will mount the fs. .PP .B 7. mount gfs2 file systems mount /path/to/storage /mountpoint Run "dlm_tool ls" to verify the nodes that have each fs mounted. .PP .B 8. shut down .nf umount -a -t gfs2 systemctl clvmd stop systemctl dlm stop systemctl corosync stop .fi .PP .B More setup information: .br .BR dlm_controld (8), .br .BR dlm_tool (8), .br .BR dlm.conf (5), .br .BR corosync (8), .br .BR corosync.conf (5) .br gfs2-utils/gfs2/man/gfs2_convert.80000664000175000017500000000376112110647577015656 0ustar andyandy.TH gfs2_convert 8 .SH NAME gfs2_convert - Convert a GFS1 filesystem to GFS2 .SH SYNOPSIS .B gfs2_convert [\fIOPTION\fR]... \fIDEVICE\fR .SH DESCRIPTION gfs2_convert is used to convert a filesystem from GFS1 to GFS2. It is required that the GFS1 filesystem be checked and fixed for errors using \fBfsck.gfs2\fP and that the filesystem be backed up before attempting to convert it. The convert process is irreversible and any error encountered during the conversion can result in the abrupt termination of the program and consequently an unusable filesystem. See \fBNOTES\fP section below for more information. .SH OPTIONS .TP \fB-h\fP Help. This prints out the proper command line usage syntax. .TP \fB-q\fP Quiet. Print less information while running. .TP \fB-n\fP No to all questions. .TP \fB-V\fP Print program Version information only. Print out the current version name. .TP \fB-v\fP Verbose operation. Print more information while running. .TP \fB-y\fP Yes to all questions. By specifying this option, gfs2_convert will not prompt before making changes. .SH EXAMPLE .TP gfs2_convert /dev/vg0/lvol0 This will convert the Global File System on the block device "/dev/vg0/lvol0" to gfs2 format. .SH NOTES If gfs2_convert is interrupted for some reason other than a conversion failure, DO NOT run \fBgfs2_fsck\fP on this partially converted filesystem. When this occurs, reissue the gfs2_convert command on the partially converted filesystem to complete the conversion process. The GFS2 filesystem does not support Context-Dependent Path Names (CDPNs). gfs2_convert identifies such CDPNs and replaces them with empty directories with the same name. The administrator can use bind mounts on these directories to get the same effect as CDPNs. When converting full or nearly full filesystems, it is possible that there won't be enough space available to fit all the GFS2 filesystem data structures. In such cases, the size of all the journals is reduced uniformly such that everything fits in the available space. gfs2-utils/gfs2/man/gfs2_edit.80000664000175000017500000004037012110647577015120 0ustar andyandy.TH gfs2_edit 8 .SH NAME gfs2_edit - Display, print or edit GFS2 or GFS internal structures. .SH SYNOPSIS .B gfs2_edit [\fIOPTION\fR]... [\fIDEVICE\fR] .SH DESCRIPTION The gfs2_edit command is a tool used to examine, edit or display internal data structures of a GFS2 or GFS file system. The gfs2_edit command can be run interactively, as described below in INTERACTIVE MODE. Caution: Several options of the gfs2_edit command alter the file system metadata and can cause file system corruption. These options should be used with great care. .SH OPTIONS .TP \fB-p\fP [\fIstruct\fR | \fIblock\fR] [\fIblocktype\fR] [\fIblockalloc [val]\fR] [\fIblockbits\fR] [\fIblockrg\fR] [\fIfind sb|rg|rb|di|in|lf|jd|lh|ld|ea|ed|lb|13|qc\fR] [\fIfield [val]\fR] Print a gfs2 data structure in human-readable format to stdout. You can enter either a block number or a data structure name. Block numbers may be specified in hex (e.g., 0x10) or decimal (e.g., 16). You can specify the following well-known locations with the -p option. \fIsb\fR, \fIsuperblock\fR - Print the superblock. \fIroot\fR - Print the root directory. \fImaster\fR - Print the master system directory. \fIjindex\fR - Print the journal index system directory. \fIper_node\fR - Print the per_node system directory. \fIinum\fR - Print the system inum file. \fIstatfs\fR - Print the system statfs file. \fIrindex\fR, \fIrgindex\fR - Print the resource group index system file. \fIrg X\fR - Print the resource group information for RG X (zero-based). \fIrgs\fR - Print the resource group information. \fIquota\fR - Print the contents of the system quota file. \fIidentify\fR - Identify a data block rather than print the block's contents. \fIsize\fR - Print the device size information. \fIjournalX\fR - Print the contents of journal X, where X is a journal number from 0 to . Only the journal headers and journal descriptors are dumped. For journal descriptors, this option prints out every file system block number logged in that section of the journal. The actual journaled blocks are not printed. If you specify a block number rather than a structure name, gfs2_edit will print out a breakdown of the structure for that block. For example: \fBgfs2_edit -p sb\fP will print the superblock, but so does \fBgfs2_edit -p 0x10\fP and \fBgfs2_edit -p 16\fP. If you specify -p without a block or structure name, gfs2_edit prints the superblock. You can specify more than one data structure with a single -p option. For example, \fBgfs2_edit -p inum statfs /dev/sda1\fP prints the system inum file and the system statfs file on /dev/sda1. Optionally, you may specify the keyword \fIblocktype\fR to print out the gfs2 block type for the specified block. Valid gfs2 block types are: 0 (Clump), 1 (Superblock), 2 (Resource Group Header), 3 (Resource Group Bitmap), 4 (Dinode), 5 (Indirect Block), 6 (Leaf), 7 (Journaled data), 8 (Log Header), 9 (Log descriptor), 10 (Extended attribute), 11 (Eattr Data), 12 (Log Buffer), 13 (Invalid), and 14 (Quota Change). Optionally, you may specify the keyword \fIblockalloc\fR with an optional value to assign. If no value is specified, the blockalloc keyword will print the block allocation type for the specified block. Valid block allocation types are: 0 (Free block), 1 (Data block), 2 (Unlinked block), and 3 (Metadata block). If a value from 0 to 3 is specified, the resource group bitmap will be changed to the new value. This may be used, for example, to artificially free or allocate a block in order to test fsck.gfs2's ability to detect and fix the problem. Optionally, you may specify the keyword \fIblockbits\fR. This option will locate and print the block containing the bitmap corresponding to the specified block. Optionally, you may specify the keyword \fIblockrg\fR. This option will locate and print the block number of the resource group that holds information about the specified block. You may also use gfs2_edit to find the next occurrence of a metadata block of a certain type. Valid metadata types are: \fInone\fR (unused metadata clump block), \fIsb\fR (superblock), \fIrg\fR (resource group), \fIrb\fR (rg bitmap), \fIdi\fR (disk inode aka dinode), \fIin\fR (indirect block list), \fIlf\fR (directory leaf), \fIjd\fR (journaled data), \fIlh\fR (journal log header), \fIld\fR (journal log descriptor), \fIea\fR (extended attribute), \fIed\fR (ea data block), \fIlb\fR (log buffer), \fI13\fR (unused block type 13), \fIqc\fR (quota change). The block AFTER the one specified with -p is the starting point for the search. For example, if you specify \fBgfs2_edit -p rg 12 find rg /dev/your/device\fP, it will find the rg that follows rg 12 (normally, this would be rg 13). Note, however, that since metadata often appears in the journals, it could be a copy of a different RG, inside a journal. Also note that gfs2_edit will only find \fBallocated\fR metadata blocks unless the type specified is none, sb, rg or rb. In other words, if you try to find a disk inode, it will only find an allocated dinode, not a deallocated one. Optionally, you may specify the keyword \fIfield\fR followed by a valid metadata field name. Right now, only the fields in disk inodes and resource groups are allowed. If no value is specified after the field, the value of the field will be printed to stdout. If a value is specified, the field's value will be changed. This may be used, for example, to artificially change the di_size field for an inode in order to test fsck.gfs2's ability to detect and fix the problem. .TP \fB-s\fs [\fIstructure\fR | \fIblock\fR] Specify a starting block for interactive mode. Any of the well-known locations found in the -p option may be specified. If you want to start on a particular resource group, specify it in quotes, e.g. -s "rg 3" .TP \fB-h, -help, -usage\fP Print help information. .TP \fB-c\fP [\fI0\fR | \fI1\fR] Use alternate color scheme for interactive mode: 0=normal (dark colors on white background), or 1 (light colors on black background). .TP \fB-V\fP Print program version information only. .TP \fB-x\fP Print in hex mode. .TP \fB-z <0-9>\fP Compress metadata with gzip compression level 1 to 9 (default 9). 0 means no compression at all. .TP \fBrg\fP \fI\fR \fI\fR Print the contents of Resource Group \fI\fR on \fI\fR. \fI\fR is a number from 0 to X - 1, where X is the number of RGs. .TP \fBrgcount\fP \fI\fR Print the number of Resource Groups in the file system on \fI\fR. .TP \fBrgflags\fP \fI\fR [\fInew_value\fR] \fI\fR Print and/or modify the rg_flags value of Resource Group \fI\fR on \fI\fR. \fI\fR is a number from 0 to X - 1, where X is the number of RGs. If \fInew_value\fR is not specified, the current rg_flags value will be printed but not modified. If \fInew_value\fR is specified, the rg_flags field will be overwritten with the new value. .TP \fBprintsavedmeta\fP \fI\fR Print off a list of blocks from that were saved with the savemeta option. .TP \fBsavemeta\fP \fI\fR \fI\fR Save off the GFS2 metadata (not user data) for the file system on the specified device to a file given by . You can use this option to analyze file system problems without revealing sensitive information that may be contained in the files. This option works quickly by using the system bitmap blocks in the resource groups to determine the location of all the metadata. If there is corruption in the bitmaps, resource groups or rindex file, this method may fail and you may need to use the savemetaslow option. The destination file is compressed using gzip unless -z 0 is specified. .TP \fBsavemetaslow\fP \fI\fR \fI\fR Save off GFS2 metadata, as with the savemeta option, examining every block in the file system for metadata. This option is less prone to failure due to file system corruption than the savemeta option, but it is extremely slow. The destination file is compressed using gzip unless -z 0 is specified. .TP \fBsavergs\fP \fI\fR \fI\fR Save off only the GFS2 resource group metadata for the file system on the specified device to a file given by . The destination file is compressed using gzip unless -z 0 is specified. .TP \fBrestoremeta\fP \fI\fR \fI\fR Take a compressed or uncompressed file created with the savemeta option and restores its contents on top of the specified destination device. \fBWARNING\fP: When you use this option, the file system and all data on the destination device is destroyed. Since only metadata (but no data) is restored, every file in the resulting file system is likely to be corrupt. The ONLY purpose of this option is to examine and debug file system problems by restoring and examining the state of the saved metadata. If the destination file system is the same size or larger than the source file system where the metadata was saved, the resulting file system will be the same size as the source. If the destination device is smaller than the source file system, gfs2_edit will restore as much as it can, then quit, leaving you with a file system that probably will not mount, but from which you might still be able to figure out what is wrong with the source file system. .SH INTERACTIVE MODE If you specify a device on the gfs2_edit command line and you specify no options other than -c, gfs2_edit will act as an interactive GFS2 file system editor for the file system you specify. There are three display modes: hex mode, structure mode and pointers mode. You use the m key to switch between the modes, as described below. The modes are as follows: .TP Hex mode (default) Display or edit blocks of the file system in hexadecimal and ascii. Lines at the top indicate the currently displayed block in both hex and decimal. If the block contains a GFS2 data structure, the name of that structure will appear in the upper right corner of the display. If the block is a well-known block, such as the superblock or rindex, there will be a line to indicate what it is. In hex mode, you can edit blocks by pressing \fB\fP and entering hexadecimal digits to replace the highlighted hex digits. Do NOT precede the numbers with "0x". For example, if you want to change the value at offset 0x60 from a 0x12 to 0xef, position your cursor to offset 0x60, so that the 12 is highlighted, then press \fB\fP and type in "ef". Press \fB\fP or \fB\fP to exit edit mode. In hex mode, different colors indicate different things. For example, in the default color scheme, the GFS2 data structure will be black, data offsets will be light blue, and actual data (anything after the gfs2 data structure) will be red. .TP Structure mode Decode the file system block into its GFS2 structure and display the values of that structure. This mode is most useful for jumping around the file system. For example, you can use the arrow keys to position down to a pointer and press \fBJ\fP to jump to that block. .TP Pointers mode Display any additional information appearing on the block. For example, if an inode has block pointers, this will display them and allow you to scroll through them. You can also position to one of them and press \fBJ\fP to jump to that block. .SH Interactive mode command keys: .TP \fBq\fP or \fB\fP The \fBq\fP or \fB\fP keys are used to exit gfs2_edit. .TP \fB\fP up, down, right, left, pg-up, pg-down, home, end The arrow keys are used to highlight an area of the display. The \fBJ\fP key may be used to jump to the block that is highlighted. .TP \fBm\fP - Mode switch The \fBm\fP key is used to switch between the three display modes. The initial mode is hex mode. Pressing the \fBm\fP key once switches to structure mode. Pressing it a second time switches from structure mode to pointers mode. Pressing it a third time takes you back to hex mode again. .TP \fBj\fP - Jump to block The \fBj\fP key jumps to the block number that is currently highlighted. In hex mode, hitting J will work when any byte of the pointer is highlighted. .TP \fBg\fP - Goto block The \fBg\fP key asks for a block number, then jumps there. Note that in many cases, you can also arrow up so that the current block number is highlighted, then press \fB\fP to enter a block number to jump to. .TP \fBh\fP - Help display The \fBh\fP key causes the interactive help display to be shown. .TP \fBe\fP - Extended mode The \fBe\fP key causes gfs2_edit to switch to extended ("pointers") mode. .TP \fBc\fP - Color scheme The \fBc\fP key causes gfs2_edit to switch to its alternate color scheme. .TP \fBf\fP - Forward block The \fBf\fP key causes you to scroll forward one block. This does not affect the "jump" status. In other words, if you use the \fBf\fP key to move forward several blocks, pressing \fB\fP will not roll you back up. .TP \fB\fP - Edit value The \fB\fP key causes you to go from display mode to edit mode. If you are in hex mode and you hit enter, you can type new hex values at the cursor's current location. Note: pressing \fB\fP in structure mode allows you to enter a new value, with the following restrictions: For gfs2 disk inodes and resource groups, it will actually change the value on disk. However, inode numbers may not be changed. For all other structures, the values entered are ignored. If you use the up arrow key to highlight the block number, then press \fB\fP, you may then enter a new block number, or any of the well-known block locations listed above (e.g. sb, rindex, inum, rg 17, etc.) and gfs2_edit will jump to the block specified. If you specify a slash character followed by a metadata type, gfs2_edit will search for the next occurrence of that metadata block type, and jump there. It will take you to block 0 if it does not find any more blocks of the specified metadata type. .TP \fB\fP If you are in pointers mode, this takes you back to the starts of the pointers you are viewing. Otherwise it takes you back to the superblock. .TP \fB\fP This takes you back to the block you were displaying before a jump. .TP \fB\fP This takes you forward to the block you were displaying when you hit \fB\fP. .SH EXAMPLES .TP gfs2_edit /dev/roth_vg/roth_lv Display and optionally edit the file system on /dev/roth_vg/roth_lv .TP gfs2_edit -p sb /dev/vg0/lvol0 Print the superblock of the gfs2 file system located on /dev/vg0/lvol0. .TP gfs2_edit -p identify 2746 2748 /dev/sda2 Print out what kind of blocks are at block numbers 2746 and 2748 on device /dev/sda2. .TP gfs2_edit -p rindex /dev/sda1 Print the resource group index system file located on device /dev/sda1. .TP gfs2_edit savemeta /dev/sda1 /tmp/our_fs Save off all metadata (but no user data) to file /tmp/our_fs. .TP gfs2_edit -p root /dev/my_vg/my_lv Print the contents of the root directory in /dev/my_vg/my_lv. .TP gfs2-edit -x -p 0x3f7a /dev/sda1 Print the contents of block 16250 of /dev/sda1 in hex. .TP gfs2_edit -p 12345 /dev/sdc2 Print the gfs2 data structure at block 12345. .TP gfs2_edit rgcount /dev/sdb1 Print how many Resource Groups exist for /dev/sdb1. .TP gfs2_edit -p rg 17 /dev/sdb1 Print the contents of the eighteenth Resource Group on /dev/sdb1. .TP gfs2_edit rgflags 3 /dev/sdb1 Print the rg_flags value for the fourth Resource Group on /dev/sdb1. .TP gfs2_edit rgflags 3 8 /dev/sdb1 Set the GFS2_RGF_NOALLOC flag on for the fourth Resource Group on /dev/sdb1. .TP gfs2_edit -p 25 blockalloc /dev/roth_vg/roth_lv Print the block allocation type of block 25. May produce this output: 3 (Metadata) .TP gfs2_edit -p 25 blockalloc 1 /dev/roth_vg/roth_lv Change the block allocation type of block 25 to data. May produce this output: 1 .TP gfs2_edit -p 25 blocktype /dev/roth_vg/roth_lv Print the metadata block type of block 25. May produce this output: 4 (Block 25 is type 4: Dinode) .TP gfs2_edit -p 25 field di_size /dev/roth_vg/roth_lv Print the di_size field of block 25. May produce this output: 134217728 .TP gfs2_edit -x -p 25 field di_size /dev/roth_vg/roth_lv Print the di_size field of block 25, in hexidecimal. May produce this output: 0x8000000 .TP gfs2_edit -p 25 field di_size 0x4000 /dev/roth_vg/roth_lv Change the di_size field of block 25 to the hexidecimal value 0x4000. May produce this output: 16384 .SH KNOWN BUGS .TP The directory code does not work well. It might be confused by directory "sentinel" entries. gfs2-utils/gfs2/man/gfs2_grow.80000664000175000017500000000451512110647577015152 0ustar andyandy.TH gfs2_grow 8 .SH NAME gfs2_grow - Expand a GFS2 filesystem .SH SYNOPSIS .B gfs2_grow [\fIOPTION\fR]... <\fIDEVICE\fR|\fIMOUNTPOINT\fR>... .SH DESCRIPTION gfs2_grow is used to expand a GFS2 filesystem after the device upon which the filesystem resides has also been expanded. By running gfs2_grow on a GFS2 filesystem, you are requesting that any spare space between the current end of the filesystem and the end of the device is filled with a newly initialized GFS2 filesystem extension. When this operation is complete, the resource group index for the filesystem is updated so that all nodes in the cluster can use the extra storage space that has been added. You may only run gfs2_grow on a mounted filesystem; expansion of unmounted filesystems is not supported. You only need to run gfs2_grow on one node in the cluster. All the other nodes will see the expansion has occurred and automatically start to use the newly available space. You must be superuser to execute \fBgfs2_grow\fP. The gfs2_grow tool tries to prevent you from corrupting your filesystem by checking as many of the likely problems as it can. When expanding a filesystem, only the last step of updating the resource index affects the currently mounted filesystem and so failure part way through the expansion process should leave your filesystem in its original unexpanded state. You can run gfs2_grow with the \fB-T\fP flag to get a display of the current state of a mounted GFS2 filesystem. The gfs2_grow tool uses the resource group (RG) size that was originally calculated when mkfs.gfs2 was done. This allows tools like fsck.gfs2 to better ensure the integrity of the file system. Since the new free space often does not lie on even boundaries based on that RG size, there may be some unused space on the device after gfs2_grow is run. .SH OPTIONS .TP \fB-D\fP Print out debugging information about the filesystem layout. .TP \fB-h\fP Prints out a short usage message and exits. .TP \fB-q\fP Be quiet. Don't print anything. .TP \fB-T\fP Test. Do all calculations, but do not write any data to the disk and do not expand the filesystem. This is used to discover what the tool would have done were it run without this flag. .TP \fB-V\fP Version. Print out version information, then exit. .SH BUGS There is no way to shrink a GFS2 filesystem. .SH SEE ALSO mkfs.gfs2(8) gfs2_jadd(8) gfs2-utils/gfs2/man/gfs2_jadd.80000664000175000017500000000351012110647577015070 0ustar andyandy.TH gfs2_jadd 8 .SH NAME gfs2_jadd \- Add journals to a GFS2 filesystem .SH SYNOPSIS .B gfs2_jadd [\fIOPTION\fR]... <\fIDEVICE\fR|\fIMOINTPOINT\fR>... .SH DESCRIPTION \fIgfs2_jadd\fR is used to add journals (and a few other per-node files) to a GFS2 filesystem. When this operation is complete, the journal index is updated so that machines mounting the filesystem at a later date will see the newly created journals in addition to the journals already there. Machines which are already running in the cluster are unaffected. You may only run \fIgfs2_jadd\fR on a mounted filesystem, addition of journals to unmounted filesystems is not supported. You only need to run \fIgfs2_jadd\fR on one node in the cluster. All the other nodes will see the expansion has occurred when required. You must be superuser to execute \fIgfs2_jadd\fR. The \fIgfs2_jadd\fR tool tries to prevent you from corrupting your filesystem by checking as many of the likely problems as it can. When growing a filesystem, only the last step of updating the journal index affects the currently mounted filesystem and so failure part way through the expansion process should leave your filesystem in its original state. .SH OPTIONS .TP \fB-c MegaBytes\fP Initial size of each journal's quota change file .TP \fB-D\fP Print out debugging information about the filesystem layout. .TP \fB-h\fP Prints out a short usage message and exits. .TP \fB-J size\fP The size of the new journals in megabytes. The defaults to 32MB (the minimum size allowed is 8MB). If you want to add journals of different sizes to the filesystem, you'll need to run gfs2_jadd once for each different size of journal. .TP \fB-j num\fP The number of new journals to add. .TP \fB-q\fP Be quiet. Don't print anything. .TP \fB-V\fP Version. Print version information, then exit. . .SH SEE ALSO mkfs.gfs2(8) gfs2_grow(8) gfs2-utils/gfs2/man/gfs2_lockcapture.80000664000175000017500000000532012154127656016502 0ustar andyandy.TH gfs2_lockcapture 8 .SH NAME gfs2_lockcapture \- capture locking information from GFS2 file systems and DLM. .SH SYNOPSIS .B gfs2_lockcapture \fR[-dqyP] [-o \fIoutput directory]\fR [-r \fInumber of runs]\fR [-s \fIseconds to sleep]\fR [-n \fIname of GFS2 file system]\fP .PP .B gfs2_lockcapture \fR[-dqyi] .SH DESCRIPTION \fIgfs2_lockcapture\fR is used to capture the GFS2 lockdump data and corresponding DLM data for GFS2 file systems. The number of captures and their frequency can be configured. By default all of the mounted GFS2 file systems will have their data collected unless GFS2 file systems are specified. .PP Please note that sysrq -t (thread) and -m (memory) dumps and the pid directories in /proc are collected unless they are disabled with the -P option. .SH OPTIONS .TP \fB-h, --help\fP Prints out a short usage message and exits. .TP \fB-d, --debug\fP Enables debug logging. .TP \fB-q, --quiet\fP Disables logging to console. .TP \fB-y, --no_ask\fP Disables all questions and assumes yes. .TP \fB-i, --info\fP Prints information about the mounted GFS2 file systems. .TP \fB-P, --disable_process_gather\fP The gathering of process information will be disabled. .TP \fB-o \fI, \fB--path_to_output_dir\fR=\fI\fP The directory where all the collect data will stored. .TP \fB-r \fI, \fB--num_of_runs\fR=\fI\fP The number of runs capturing the lockdump data. The default is 3 runs. .TP \fB-s \fI, \fB--seconds_sleep\fR=\fI\fP The number of seconds to sleep between runs of capturing the lockdump data. The default is 120 seconds. .TP \fB-n \fI, \fB--fs_name\fR=\fI\fP The name of the GFS2 filesystem(s) that will have their lockdump data captured. By default, all mounted GFS2 file systems will have their data captured. . .SH NOTES The output of the following commands will be captured: .IP \(bu 2 uname -a .IP \(bu 2 uptime .IP \(bu 2 ps h -AL -o "tid,s,cmd" .IP \(bu 2 df -h .IP \(bu 2 lsof .IP \(bu 2 mount -l .IP \(bu 2 dlm_tool ls .IP \(bu 2 dlm_tool lockdebug -v -s -w .IP \(bu 2 echo "t" > /proc/sysrq-trigger (If /proc/1/stack does not exist) .IP \(bu 2 echo "m" > /proc/sysrq-trigger (If /proc/1/stack does not exist) .SH AUTHOR .nf Shane Bradley .fi .SH FILES .I /proc/mounts .br .I /proc/slabinfo .br .I /sys/kernel/config/dlm/cluster/lkbtbl_size .br .I /sys/kernel/config/dlm/cluster/dirtbl_size .br .I /sys/kernel/config/dlm/cluster/rsbtbl_size .br .I /sys/kernel/debug/gfs2/ .br .I /sys/kernel/debug/dlm/ .br .I /proc// (If /proc/1/stack does exists) .br .I /var/log/messages .br .I /var/log/cluster/ .br .SH SEE ALSO gfs2-utils/gfs2/man/gfs2_trace.80000664000175000017500000000265412110647577015274 0ustar andyandy.TH gfs2_trace 8 .SH NAME gfs2_trace \- can enable trace events, disable trace events, and capture data from GFS2 trace events. .SH SYNOPSIS .B gfs2_trace \fR[-dqEN] [-e \fItrace event name]\fR [-n \fItrace event name]\fR [-o \fIoutput filename]\fR .PP .SH DESCRIPTION \fIgfs2_trace\fR can enabled and disable trace events on all trace events or selected trace events. \fIgfs2_trace\fR can capture the output of the trace events and write the output to a file. When capturing trace events, the script will exit when control-c is pressed. The trace events will be then written to the selected file. .PP .SH OPTIONS .TP \fB-h, --help\fP Prints out a short usage message and exits. .TP \fB-d, --debug\fP enables debug logging. .TP \fB-q, --quiet\fP disables logging to console. .TP \fB-l, --list\fP lists the enabled state and filters for the GFS2 trace events .TP \fB-E, --enable_all_trace_events\fP enables all trace_events for GFS2 .TP \fB-e \fI, \fB----enable_trace_event\fR=\fI\fP selected trace_events that will be enabled for GFS2 .TP \fB-N, --disable_all_trace_events\fP disables all trace_events for GFS2 .TP \fB-n \fI, \fB----disable_trace_event\fR=\fI\fP selected trace_events that will be enabled for GFS2 .TP \fB-c \fI, \fB--capture\fR=\fI\fP enables capturing of trace events and will save the data to a file . .SH SEE ALSO gfs2-utils/gfs2/man/mkfs.gfs2.80000664000175000017500000001115212154134764015043 0ustar andyandy.TH mkfs.gfs2 8 .SH NAME mkfs.gfs2 - Make a GFS2 filesystem .SH SYNOPSIS .B mkfs.gfs2 [\fIOPTION\fR]... \fIDEVICE\fR \fI[ block-count ]\fR .SH DESCRIPTION mkfs.gfs2 is used to create a Global File System. .SH OPTIONS .TP \fB-b\fP \fIBlockSize\fR Set the filesystem block size to \fIBlockSize\fR (must be a power of two). The minimum block size is 512. The FS block size cannot exceed the machine's memory page size. On the most architectures (i386, x86_64, s390, s390x), the memory page size is 4096 bytes. On other architectures it may be bigger. The default block size is 4096 bytes. In general, GFS2 filesystems should not deviate from the default value. .TP \fB-c\fP \fIMegaBytes\fR Initial size of each journal's quota change file .TP \fB-D\fP Enable debugging output. .TP \fB-h\fP Print out a help message describing available options, then exit. .TP \fB-J\fP \fIMegaBytes\fR The size of the journals in Megabytes. The default journal size is 128 megabytes. The minimum size is 8 megabytes. .TP \fB-j\fP \fINumber\fR The number of journals for gfs2_mkfs to create. You need at least one journal per machine that will mount the filesystem. If this option is not specified, one journal will be created. .TP \fB-K\fP Keep, do not attempt to discard blocks at mkfs time (discarding blocks initially is useful on solid state devices and sparse / thin-provisioned storage). .TP \fB-O\fP This option prevents gfs2_mkfs from asking for confirmation before writing the filesystem. .TP \fB-o\fP Specify extended options. Multiple options can be separated by commas. Valid extended options are: .RS 1.0i .TP .BI help Display an extended options help summary, then exit. .TP .BI sunit= bytes This is used to specify the stripe unit for a RAID device or striped logical volume. This option ensures that resource groups will be stripe unit aligned and overrides the stripe unit value obtained by probing the device. This value must be a multiple of the file system block size and must be specified with the .I swidth option. .TP .BI swidth= bytes This is used to specify the stripe width for a RAID device or striped logical volume. This option ensures that resource groups will be stripe aligned and overrides the stripe width value obtained by probing the device. This value must be a multiple of the .I sunit option and must also be specified with it. .TP .BI align= [0|1] Disable or enable the alignment of resource groups. The default behaviour is to align resource groups to the stripe width and stripe unit values obtained from probing the device or specified with the .I swidth and .I sunit extended options. .RE .TP \fB-p\fP \fILockProtoName\fR LockProtoName is the name of the locking protocol to use. Acceptable locking protocols are \fIlock_dlm\fR (for shared storage) or if you are using GFS2 as a local filesystem (\fB1 node only\fP), you can specify the \fIlock_nolock\fR protocol. If this option is not specified, \fIlock_dlm\fR protocol will be assumed. .TP \fB-q\fP Be quiet. Don't print anything. .TP \fB-r\fP \fIMegaBytes\fR gfs2_mkfs will try to make Resource Groups about this big. Minimum RG size is 32 MB. Maximum RG size is 2048 MB. A large RG size may increase performance on very large file systems. If not specified, mkfs.gfs2 will choose the RG size based on the size of the file system: average size file systems will have 256 MB RGs, and bigger file systems will have bigger RGs for better performance. .TP \fB-t\fP \fILockTableName\fR The lock table field appropriate to the lock module you're using. It is \fIclustername:fsname\fR. Clustername must match that in cluster.conf; only members of this cluster are permitted to use this file system. Fsname is a unique file system name used to distinguish this GFS2 file system from others created (1 to 16 characters). Lock_nolock doesn't use this field. Valid \fIclustername\fRs and \fIfsname\fRs may only contain alphanumeric characters, hyphens (-) and underscores (_). .TP \fB-V\fP Print program version information, then exit. .TP [ \fIblock-count\fR ] Make the file system this many blocks in size. If not specified, the entire length of the specified device is used. .SH EXAMPLE .TP gfs2_mkfs -t mycluster:mygfs2 -p lock_dlm -j 2 /dev/vg0/mygfs2 This will make a Global File System on the block device "/dev/vg0/mygfs2". It will belong to "mycluster" and register itself as wanting locking for "mygfs2". It will use DLM for locking and make two journals. .TP gfs2_mkfs -t mycluster:mygfs2 -p lock_nolock -j 3 /dev/vg0/mygfs2 This will make a Global File System on the block device "/dev/vg0/mygfs2". It will belong to "mycluster" and but have no cluster locking. It will have three journals. gfs2-utils/gfs2/man/target.mk0000664000175000017500000000002512110647577014771 0ustar andyandy manpages += *.[0-9] gfs2-utils/gfs2/man/tunegfs2.80000664000175000017500000000225212110647577015004 0ustar andyandy.TH tunegfs2 8 .SH NAME tunegfs2 - View and manipulate gfs2 superblocks .SH SYNOPSIS .B tunegfs2 [\fIOPTIONS\fR] /dev/blockdevice .SH DESCRIPTION tunegfs2 allows viewing and manipulating the values contained in a GFS or GFS2 superblock. It is able to modify the \fIUUID\fR (on GFS2 only), \fIlabel\fR, \fIlockproto\fR and \fIlocktable\fR. The values in the GFS2 superblock are read only on mount. Any changes on a live filesystem will not take effect until the next time it is mounted. Making changes on a live filesystem is not recommended for this reason. .SH OPTIONS .TP \fB-h\fP Prints out usage information for this command. .TP \fB-l\fP List contents of the filesystem superblock. Includes the current values of the parameters that can be set by this program. .TP \fB-L\fP \fI