pax_global_header00006660000000000000000000000064131565264770014532gustar00rootroot0000000000000052 comment=22e1cd13270f6e29a8d2d1af03dfeceecf515a89 COPYING000066400000000000000000000431061315652647700121350ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. Makefile000066400000000000000000000120551315652647700125410ustar00rootroot00000000000000 # Copyright (C) 2005-2017 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA HOSTCC ?= cc override CPPFLAGS += -D_GNU_SOURCE override CPPFLAGS += -I./libau override CPPFLAGS += -DAUFHSM_CMD=\"/usr/bin/aufhsm\" override CFLAGS += -O -Wall INSTALL ?= install Install = ${INSTALL} -o root -g root -p ManDir = /usr/share/man # # MountCmd: full path for mount(8) # UmountCmd: full path for umount(8) # MountCmd=/bin/mount UmountCmd=/bin/umount override CPPFLAGS += -DMOUNT_CMD=\"${MountCmd}\" override CPPFLAGS += -DUMOUNT_CMD=\"${UmountCmd}\" # # BuildFHSM: specify building FHSM tools # BuildFHSM = no ifeq (${BuildFHSM},yes) override CPPFLAGS += -DAUFHSM LibUtilObj = mng_fhsm.o define MakeFHSM ${MAKE} -C fhsm ${1} endef else # empty define MakeFHSM @: endef endif LibUtil = libautil.a LibUtilObj += perror.o proc_mnt.o br.o plink.o mtab.o LibUtilHdr = au_util.h TopDir = ${CURDIR} LastTestGlibc = test_glibc # don't use -q for fgrep here since it exits when the string is found, # and it causes the broken pipe error. define test_glibc $(shell cat ${LastTestGlibc} 2> /dev/null || \ { ${1} ${CPPFLAGS} -I ${TopDir}/extlib/non-glibc -E -P -dM ${2} |\ fgrep -w __GNU_LIBRARY__ > /dev/null && \ echo yes || \ echo no; } |\ tee ${LastTestGlibc}) endef $(filter-out clean, ${MAKECMDGOALS} all): \ $(eval Glibc=$(call test_glibc, ${CC}, ver.c)) #$(warning Glibc=${Glibc}) LibAuDir ?= /usr/lib ExtlibGlibcObj = au_nftw.o ExtlibNonGlibcObj = ${ExtlibGlibcObj} au_decode_mntpnt.o error_at_line.o ExtlibPath = extlib/glibc ExtlibObj = ${ExtlibGlibcObj} ifeq (${Glibc},no) ExtlibPath = extlib/non-glibc ExtlibObj = ${ExtlibNonGlibcObj} LibUtilHdr += ${ExtlibPath}/error_at_line.h override CPPFLAGS += -I${CURDIR}/${ExtlibPath} endif LibUtilObj += ${ExtlibObj} Cmd = aubusy auchk aubrsync Man = aufs.5 Etc = etc_default_aufs Bin = auibusy aumvdown auplink mount.aufs umount.aufs #auctl BinObj = $(addsuffix .o, ${Bin}) ifeq (${Glibc},no) AuplinkFtwCmd=/sbin/auplink_ftw override CPPFLAGS += -DAUPLINK_FTW_CMD=\"${AuplinkFtwCmd}\" Cmd += auplink_ftw endif # suppress 'eval' for ${v} $(foreach v, CC CPPFLAGS CFLAGS INSTALL Install ManDir TopDir LibUtilHdr \ Glibc LibAuDir ExtlibPath, \ $(eval MAKE += ${v}="$${${v}}")) all: ver_test ${Man} ${Bin} ${Etc} ${MAKE} -C libau $@ ln -sf ./libau/libau*.so . $(call MakeFHSM, $@) clean: ${RM} ${Man} ${Bin} ${Etc} ${LibUtil} libau.so* ${LastTestGlibc} *~ ${RM} ${BinObj} ${LibUtilObj} for i in $(patsubst %.o,%.c, ${ExtlibGlibcObj} ${ExtlibNonGlibcObj}); \ do test -L $${i} && ${RM} $${i} || :; \ done ${MAKE} -C libau $@ $(call MakeFHSM, $@) ver_test: ver ./ver ${Bin}: override LDFLAGS += -static -s ${Bin}: LDLIBS = -L. -lautil ${BinObj}: %.o: %.c ${LibUtilHdr} ${LibUtil} ${LibUtilObj}: %.o: %.c ${LibUtilHdr} #${LibUtil}: ${LibUtil}(${LibUtilObj}) ${LibUtil}: $(foreach o, ${LibUtilObj}, ${LibUtil}(${o})) .NOTPARALLEL: ${LibUtil} ExtlibSrc = $(patsubst %.o,%.c, ${ExtlibObj}) ${ExtlibSrc}: %: ${ExtlibPath}/% ln -sf $< $@ .INTERMEDIATE: ${ExtlibSrc} ${ExtlibObj}: CPPFLAGS += -I${CURDIR} etc_default_aufs: c2sh aufs.shlib ${RM} $@ echo '# aufs variables for shell scripts' > $@ ./c2sh >> $@ echo >> $@ sed -e '0,/^$$/d' aufs.shlib >> $@ aufs.5: aufs.in.5 c2tmac ${RM} $@ ./c2tmac > $@ awk '{ \ gsub(/\140[^\047]*\047/, "\\[oq]&\\[cq]"); \ gsub(/\\\[oq\]\140/, "\\[oq]"); \ gsub(/\047\\\[cq\]/, "\\[cq]"); \ gsub(/\047/, "\\[aq]"); \ print; \ }' aufs.in.5 >> $@ chmod a-w $@ c2sh c2tmac ver: CC = ${HOSTCC} .INTERMEDIATE: c2sh c2tmac ver install_sbin: File = auibusy aumvdown auplink mount.aufs umount.aufs ifeq (${Glibc},no) install_sbin: File += auplink_ftw endif install_sbin: Tgt = ${DESTDIR}/sbin install_ubin: File = aubusy auchk aubrsync #auctl install_ubin: Tgt = ${DESTDIR}/usr/bin install_sbin install_ubin: ${File} ${INSTALL} -d ${Tgt} ${Install} -m 755 ${File} ${Tgt} install_etc: File = etc_default_aufs install_etc: Tgt = ${DESTDIR}/etc/default/aufs install_etc: ${File} ${INSTALL} -d $(dir ${Tgt}) ${Install} -m 644 -T ${File} ${Tgt} install_man5: File = aufs.5 install_man5: Tgt = ${DESTDIR}${ManDir}/man5 install_man8: File = aumvdown.8 install_man8: Tgt = ${DESTDIR}${ManDir}/man8 install_man5 install_man8: ${File} ${INSTALL} -d ${Tgt} ${Install} -m 644 ${File} ${Tgt} install_man: install_man5 install_man8 install_ulib: ${MAKE} -C libau $@ install: all install_man install_sbin install_ubin install_etc install_ulib $(call MakeFHSM, $@) -include priv.mk README000066400000000000000000000106641315652647700117650ustar00rootroot00000000000000 Utilities for aufs http://aufs.sf.net Junjiro R. Okajima These utilities are always necessary for aufs. If you forget to install them, your aufs may not work correctly. And these are not for aufs1 essentially, except aubrsync. See below in detail. You will find GIT branches whose name is in form of "aufs4.x" where "x" represents the linux kernel version, "linux-4.x". For instance, "aufs4.0" is for linux-4.0. You may not be able to find the GIT branch in aufs-util for your version. In this case, you should git-checkout the branch for the nearest lower number. If you are using linux-4.10 and aufs4.10 (which are not released yet), but the "aufs4.10" branch doesn't exit in this repository, then "aufs4.9", "aufs4.8", ... or something is the branch for you. Also you can view all branches by $ git branch -a Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY. The minor version number, 'x' in '4.x', of aufs may not always follow the minor version number of the kernel. Because changes in the kernel that cause the use of a new minor version number do not always require changes to aufs-util. Makefile in this tree has some customizable make-variables. - CPPFLAGS specify the include path if necessary. Some of linux kernel header files are necessary including linux/aufs_type.h. If you have installed kernel header files to /usr/include, then you may not need to specify CPPFLAGS. Otherwise you need to do something like this sample. + run "make headers_install" in your kernel source tree, and $LinuxSrc/usr/include is created and header files are installed there by default. + if you build aufs4 as module from aufs4-standalone.git, then $AufsStdalone/usr/include is created and header files are installed there. + and specify CPPFLAGS="-I $LinuxSrc/usr/include -I $AufsStdalone/usr/include" Note that your distribution may contain an obsoleted version of aufs_type.h in /usr/include/linux or something. When you build aufs utilities, make sure that your compiler refers the correct aufs header file which is built by "make headers_install." - DESTDIR specify your install path if necessary. some commands have to be installed under /sbin. - HOSTCC specify the compiler on host side when cross-compiling, otherwise you can ignore this make-variable. For cross-compiling, use CC for cross-compiler (target side) and HOSTCC for native (host side) compiler, since this package requires buiding and running some small internal tools. - BuildFHSM specify "yes" if you want FHSM tools or if you enabled CONFIG_AUFS_FHSM when you built your aufs kernel module. The default is BuildFHSM=no. $ make BuildFHSM=yes - LibAuDir specify the directory path to install the shared object library libau.so. The default is /usr/lib. This make variable will be useful for the system who has both of glibc and musl-libc, or the system who has x86_64 and i686 libraries. - MountCmd, UmountCmd specify mount(8) and umount(8) in full path. By default, they are "/bin/mount" and "/bin/umount" individually. o /sbin/mount.aufs, /sbin/umount.aufs Helpers for util-linux-ng package. You should NOT invoke them manually. Just install them by "make install". o /sbin/auplink Handles aufs pseudo-link at remount/unmount time. You can invoke it manually at anytime. o /sbin/aumvdown Operates aufs internal feature "move-down" (opposite of "copy-up"). See aumvdown.8 in detail. o /usr/bin/aubusy Prints PIDs which make the branch busy and un-removable. It runs /sbin/auibusy internally. o /usr/bin/auchk Similar to generic fsck. Checks whether a branch is healthy or not from aufs's point of view. o /usr/bin/aubrsync Move files from the upper writable branch to the lower branch. If you use this script with aufs1, then you need to install aufs.shlib to /usr/lib/aufs.shlib. Currently only the 20080211 version is tested for aufs1. The development of this script is sponcered by ASUSTek Computer Inc. (http://www.asus.com/). Kindly they agreed that I keep my aufs work as free software as it has been. o /etc/default/aufs A library for shell scripts. o /usr/lib/libau.so A dynamic link library for "readdir(3) in user-space". If you need this , then run "make libau.so install_ulib". And refer to the aufs manual in detail. Contributions - Michael S. Zick wrote good examples, and made them public at http://hg.minimodding.com/repos/aufs/ # Local variables: ; # mode: text; # End: ; au_util.h000066400000000000000000000104471315652647700127170ustar00rootroot00000000000000/* * Copyright (C) 2005-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __AUFS_UTIL_H__ #define __AUFS_UTIL_H__ #include #include #ifdef __GNU_LIBRARY__ #include static inline char *au_decode_mntpnt(char *src, char *dst, int len) { return src; } #else #include "error_at_line.h" char *au_decode_mntpnt(char *src, char *dst, int len); #endif #define AuRelease "20170918" #ifdef AUFHSM #define AuFhsmStr " with FHSM" #else #define AuFhsmStr "" #endif #define AuVersionGitBranch "aufs4.9" #define AuVersion "aufs-util for " AuVersionGitBranch AuFhsmStr " " AuRelease #define DROPLVL "droplvl" #define DROPLVL1 "noatime,dirperm1,udba=none,nodirren" #define DROPLVL1R "relatime,nodirperm1,udba=reval" #define DROPLVL2 "notrunc_xino,notrunc_xib" /* #define DROPLVL2R "trunc_xino,trunc_xib" */ #define DROPLVL2R DROPLVL2 #define DROPLVL3 "noplink,noxino" #define DROPLVL3R "plink,xino=" AUFS_XINO_DEFPATH /* * error_at_line() is decleared with (__printf__, 5, 6) attribute, * and our compiler produces a warning unless args is not given. * __VA_ARGS__ does not help the attribute. */ #define AuFin(fmt, ...) do { \ if (!errno) \ errno = -1; /* unknown error */ \ error_at_line(errno, errno, __FILE__, __LINE__, fmt, \ ##__VA_ARGS__); \ } while (0) #ifdef DEBUG #define MTab "/tmp/mtab" #else #define MTab "/etc/mtab" #endif /* perror.c */ extern int au_errno; extern const char *au_errlist[]; void au_perror(const char *s); /* proc_mounts.c */ struct mntent; int au_proc_getmntent(char *mntpnt, struct mntent *rent); /* br.c */ union aufs_brinfo; int au_br(union aufs_brinfo **brinfo, int *nbr, char *root); #ifdef AUFHSM int au_nfhsm(int nbr, union aufs_brinfo *brinfo); int au_br_qsort_path(const void *_a, const void *_b); void au_br_sort_path(int nbr, union aufs_brinfo *brinfo); int au_br_bsearch_path(const void *_path, const void *_brinfo); union aufs_brinfo *au_br_search_path(char *path, int nbr, union aufs_brinfo *brinfo); #endif /* lib for plink.c */ struct ino_array { char *o; int bytes; union { char *p; ino_t *cur; }; int nino; }; int ftw_list(const char *fname, const struct stat *st, int flags, struct FTW *ftw); int ftw_cpup(const char *fname, const struct stat *st, int flags, struct FTW *ftw); #ifdef __GNU_LIBRARY__ static inline int au_nftw(const char *dirpath, int (*fn) (const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf), int nopenfd, int flags) { return nftw(dirpath, fn, nopenfd, flags); } #else #define FTW_ACTIONRETVAL 0 /* dummy */ typedef int (*__nftw_func_t)(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf); int au_nftw(const char *dirpath, int (*fn) (const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf), int nopenfd, int flags); #endif /* plink.c */ enum { AuPlink_FLUSH, AuPlink_CPUP, AuPlink_LIST }; #define AuPlinkFlag_OPEN 1UL #define AuPlinkFlag_CLOEXEC (1UL << 1) #define AuPlinkFlag_CLOSE (1UL << 2) extern struct ino_array ia; int au_plink(char cwd[], int cmd, unsigned int flags, int *fd); /* mtab.c */ void au_print_ent(struct mntent *ent); int au_update_mtab(char *mntpnt, int do_remount, int do_verbose); /* fhsm/fhsm.c */ #ifdef AUFHSM void mng_fhsm(char *cwd, int unmount); #else static inline void mng_fhsm(char *cwd, int unmount) { /* empty */ } #endif #define _Dpri(fmt, ...) printf("%s:%d:" fmt, \ __func__, __LINE__, ##__VA_ARGS__) #ifdef DEBUG #define Dpri(fmt, ...) _Dpri(fmt, ##__VA_ARGS__) #else #define Dpri(fmt, ...) do { } while(0) #endif #endif /* __AUFS_UTIL_H__ */ aubrsync000077500000000000000000000164551315652647700126650ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2005-2015 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # The development of this script is sponcered by ASUSTek Computer Inc. # Kindly they agreed that I keep my aufs work as free software as it has # been. # set -eu #set -x me=$(basename $0) EEcho() # str { echo ${me}: $@ 1>&2 } f=/sbin/mount.aufs test ! -x $f && EEcho $f is not installed && exit 1 # special handling for backward compatibility. # # aufs in the donated eeepc is aufs1 20080211 without CONFIG_AUFS_COMPAT. # /etc/default/aufs was introduced in aufs1 20080922. # shwh/noshwh was introduced in aufs1 20080310 with CONFIG_AUFS_SHWH. # noshwh became always available regardless CONFIG_AUFS_SHWH in aufs1 20081117. noshwh=1 AUFS_VERSION=20080211 f=/etc/default/aufs if [ -s $f ] then . $f else echo ${me}: warning, broken $f, assuming aufs is $AUFS_VERSION f=/usr/lib/aufs.shlib test ! -s $f && EEcho $f is not installed && exit 1 . $f case $AUFS_VERSION in 200*) # aufs1 test $AUFS_VERSION -lt 20081117 && noshwh=0 ;; esac AUFS_SUPER_MAGIC=1635083891 AUFS_SUPER_MAGIC_HEX=0x61756673 AUFS_WH_PFX=.wh. AUFS_WH_PFX2=.wh..wh. AUFS_WH_DIROPQ=.wh..wh..opq fi ######################################## _Rsync="rsync --exclude=lost+found" Rsync="$_Rsync -aHSx --devices --specials --delete-before" Copy="$Rsync" Move="$Copy" RsyncWh="$_Rsync -ptgoHx" FindForRm() # rw { echo "find \"$1\" -xdev -depth \( \( ! -type d \( -name $AUFS_WH_DIROPQ -o ! -name ${AUFS_WH_PFX2}\* \) \) -o \( -type d ! -name ${AUFS_WH_PFX2}\* ! -wholename \"$1\" ! -wholename \"$1/lost+found\" \) \) -print0" } MoveWh() # rw ro+wh { cd "$1" find . -xdev -name ${AUFS_WH_PFX}\* ! -name ${AUFS_WH_PFX2}\* \ -printf '%P\n' | while read wh do f=$(echo "$wh" | sed -e ' s/^'${AUFS_WH_PFX}'// t s:/'${AUFS_WH_PFX}':/: ') test -e "$dst/$f" || echo "$wh" done | # -v $RsyncWh --files-from=- ./ "$2" cd "$OLDPWD" } copy() { $Copy $@ "$mntpnt"/ "$dst" } _move() { set +x test $hinotify -ne 1 && echo ${me}: warning, -i is not specified src_is_nfs=0 test "$(stat -f -c %T "$src")" = "nfs" && src_is_nfs=1 set $quiet $Move $@ && eval $(FindForRm "$src") | { if [ $src_is_nfs -eq 1 ] then mount -o remount "$mntpnt" # remount for NFSv4 is unreliable mount -o remount "$src" || : fi xargs -r0 rm -fr #-v } } move() { _move $@ "$mntpnt"/ "$dst" } move_with_wh() { { set +x MoveWh "$src" "$dst" set $quiet } && move --exclude=${AUFS_WH_PFX}\* } # backward compatibility move_w() { move_with_wh $@ } Usage() { t=$(FindForRm src_branch | sed -e ' s/"//g $b s/$/ \\/') cat <<- EOF $me Options move | move_with_wh | copy \\ mntpnt src_branch dst_branch [ options for rsync ] generic form: $me [ -w | --wh ] [ -i | --inotify ] Options \\ mntpnt cmd [ parameters for cmd ] Options: [ -n | --dry_run ] [ -q | --quiet ] The dst_branch must be mounted as writable. During the operation, the mntpnt is set readonly. If you are opening a file for writing on the writable branch, you need to close the file before invoking this script. The -w or --wh option requires CONFIG_AUFS_SHWH enabled. The -i or --inotify option requires CONFIG_AUFS_HINOTIFY enabled. 'copy' is a shortcut for $me mntpnt \\ $Copy mntpnt/ dst_branch 'move' is a shortcut for $me mntpnt \\ "$Move \\ mntpnt/ dst_branch && \\ $t |\\ xargs -r0 rm -fr" Note: in most cases, you will need '-i' option, and find(1) is invoked by $me only when rsync(1) succeded. 'move_with_wh' is a simple variation of 'move' which moves whiteouts separately before the actual 'move'. If you execute this script under linux-2.6.24 or earlier, the kernel may produce a harmless warning "inotify.c:NNN set_dentry_child_flags()". The message was already removed in linux-2.6.25. examples: - Copy and reflect all the modification (modifed files, newly created and removed ones) in the upper branch to the lower branch. This operation is for aufs which has only 2 branches, and mainly for a system shutdown script. All files on the upper branch remain. $ sudo $me copy /your/aufs /your/upper_branch /your/lower_branch - Like above (2 branches), move and reflect all modifications from upper to lower. Almost all files on the upper branch will be removed. You can still use this aufs after the operation. But the inode number may be changed. If your application which depends upon the inode number was running at that time, it may not work correctly. $ sudo $me move /your/aufs /your/upper_branch /your/lower_branch EOF # - Like above (2 branches), generate a new middle layer like a # snapshot including whiteouts and make the upper_branch almost # empty, but untouch the lower_branch. # # $ img=/hda1/a.ext2 # $ dd if=/dev/zero of=\$img bs=4k count=1k # $ mkfs -t ext2 -F \$img # $ sudo mount -o rw,loop \$img /your/new_branch # $ sudo mount -o remount,ins:1:/your/new_branch=ro+wh /your/aufs # $ sudo $me _move /your/aufs /your/upper_branch /your/lower_branch \\ # "--remove-source-files \\ # --exclude=$AUFS_WH_BASE \\ # --exclude=$AUFS_WH_PLINKDIR \\ # --exclude=$AUFS_WH_TMPDIR \\ # /your/upper_branch/ /your/new_branch; \\ # mount -o remount,ro /your/new_branch" # EOF } ######################################## wh=0 hinotify=0 quiet=-x dry_run= cmd= cmd_opts= for i do case $i in -w|--wh) wh=1;; -i|--inotify) hinotify=1;; -n|--dry_run) dry_run=echo;; -q|--quiet) quiet=+x;; -h|--help) Usage; exit 0;; --) shift; break;; *) break;; esac shift done test $# -lt 2 && Usage 1>&2 && exit 1 case "$1" in _move|move|copy|move_w|move_with_wh) test $# -lt 4 && Usage 1>&2 && exit 1 cmd=$1 SetDir mntpnt "$2" SetDir src "$3" SetDir dst "$4" shift 4 wh=0 ;; *) SetDir mntpnt "$1" cmd="$2" shift 2 ;; esac cmd_opts="$@" case $(stat -f -c %T "$mntpnt") in aufs|UNKNOWN*${AUFS_SUPER_MAGIC_HEX}*) ;; *) EEcho "$mntpnt" is not aufs exit 1 ;; esac cur_opts=$(MntOpts "$mntpnt") test ! "$cur_opts" && EEcho bad /proc/mounts or "$mntpnt" is not mounted && exit 1 cur_opts="udba=reval,noshwh,$cur_opts" test $noshwh -eq 0 && cur_opts=$(echo $cur_opts | sed -e 's/,noshwh//') # force flushing the pusedo-links tmp_opts="remount,ro,udba=reval,noshwh" test $noshwh -eq 0 && tmp_opts=$(echo $tmp_opts | sed -e 's/,noshwh//') test $wh -eq 1 && tmp_opts="$tmp_opts,shwh" test $hinotify -eq 1 && tmp_opts="$tmp_opts,udba=inotify" # here we go trap "$dry_run mount -o remount,$cur_opts \"$mntpnt\"" EXIT set $quiet $dry_run mount -o $tmp_opts "$mntpnt" eval "$dry_run $cmd $cmd_opts" aubusy000077500000000000000000000052271315652647700123420ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2011-2014 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA verbose=0 if [ "$1" = -v ] then verbose=1 shift fi test $# -ne 2 && { echo Invalid argument echo Usage: $(basename $0) '[-v]' aufs_mntpnt branch_path printf '\tprint PIDs which make the branch busy and un-removable.\n' printf '\t-v prints additional information, the inode number in aufs,\n' printf '\tthe branch index, and the actual inode number on that branch.\n' } 1>&2 && exit 1 set -eu f=/etc/default/aufs test ! -e $f && echo $f is not installed 1>&2 && exit 1 . $f mntent=$(FindMntEnt "$1") test ! "$mntent" && echo No such aufs mount point "$1" 1>&2 && exit 1 mntpnt=$(echo $mntent | cut -f2 -d' ') SetDir br "$2" mntopts=$(MntOpts "$1") si=$(echo $mntopts | sed -e 's/^.*,si=\([^,]*\),*.*$/\1/') d=/sys/fs/aufs/si_$si if [ -r $d/br0 ] then # it may be very long and shell may not be able to handle as wildcard bindex=$(find $d -name 'br*' | xargs -r grep -l "^${br}=" | xargs -r basename | cut -c3-) else bi=$(echo $mntpopts | sed -e 's/^.*,br://' | tr ':' '\n' | rev | cut -f2- -d= | rev | grep -n "^${br}=" | cut -f1 -d:) # the origin in aufs is zero. bindex=$(($bi - 1)) fi ######################################## # here we go lsof -FDiR "$mntpnt" | awk -v g_ppid=$$ \ -v g_dev=$(stat -c %D "$mntpnt") \ -v root=$AUFS_ROOT_INO \ ' function pr() { if (inum) { print pid, inum; inum = ""; } } /^p/ { pr(); pid = substr($0, 2); do_print = 1; next; } /^R/ { ppid = substr($0, 2); if (ppid == g_ppid) do_print = 0; next; } /^D/ { dev = substr($0, 4); if (dev != g_dev) do_print = 0; next; } /^i/ && do_print { n = substr($0, 2); if (n != root) inum = inum " " n; } END { pr(); } ' | #tee /dev/tty | while read pid ino do # it may be very long and shell may not be able to handle as parameters echo "$ino" | xargs -r auibusy "$mntpnt" $bindex | { if [ $verbose -eq 1 ] then sed -e s/^/$pid' /' else test $(wc -c) -ne 0 && echo $pid fi } done | uniq auchk000077500000000000000000000062221315652647700121210ustar00rootroot00000000000000#!/bin/sh - # Copyright (C) 2005-2011 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA set -eu #set -x EEcho() # str { echo $0: $@ 1>&2 } f=/etc/default/aufs . $f Usage() { cat <<- EOF $0 [OPTION] writable_branch '[...]' -q | --quiet: -w | --whiteout: -r | --real: -s | --skip: EOF } Pass() # title { pass=$(($pass + 1)) test $opt_q -ne 1 && EEcho \[Pass $pass\] $@ true } Remove() # file { if [ -d "$1" ] then if [ $opt_r -eq 1 ] then rm -fvr "$1" || : else rm -ir "$1" || : fi else rm -v "$1" || : fi } opt_q=0 opt_w=0 opt_r=0 opt_s=0 for i do case $(getopt -o qwrs --long quiet,whiteout,real,skip -- $i | sed -e 's/^ *//' -e 's/ --$//' ) in -q|--quiet) opt_q=1;; -w|--whiteout) opt_w=1;; -r|--real) opt_r=1;; -s|--skip) opt_s=1;; --) ;; *) break;; esac shift done for i do test $opt_q -ne 1 && EEcho Checking "$i" for aufs cd "$i" case $(stat -f -c %T .) in aufs|UNKNOWN*${AUFS_SUPER_MAGIC_HEX}*) EEcho $i must not be aufs cd $OLDPWD continue ;; esac ######################################## pass=0 Pass Illegal whiteout find . -name '.wh.*' ! -name '.wh..wh.*' -printf '%h\0%f\0' | xargs -r0n2 | while read dir wh do #echo \""$dir"\" \""$wh"\" base=$(echo "$wh" | cut -c5-) test ! -e "$dir/$base" && continue ls -ld "$dir/$wh" "$dir/$base" if [ $opt_w -eq 1 ] then ans=w elif [ $opt_r -eq 1 ] then ans=r elif [ $opt_s -eq 1 ] then ans=s else read -p 'Which to remove [whiteout/real/skip]? ' ans \ < /dev/tty > /dev/tty 2>&1 fi case "$ans" in [wW]*) Remove "$dir/$wh" || :;; [rR]*) Remove "$dir/$base" || :;; *) echo skipped;; esac done ######################################## Pass Remained pseudo-links did=0 for plink in ${AUFS_WH_PLINKDIR}/* do test ! -e "$plink" && break if [ -d "$plink" ] then EEcho illegal "$plink" continue fi did=1 #ls -l "$plink" || : find . -inum $(basename "$plink" | cut -f2 -d .) -ls || : done if [ $did -ne 0 ] then cat <<- EOF They will be maintained at remount or umount time, if you installed aufs helper scripts (See README in detail). If "$i" is not a writeble branch of CURRENTLY mounted aufs, you need to maintain them by yourself. EOF fi ######################################## Pass Remained temp files for tmp in ${AUFS_WH_ORPHDIR}/* do test ! -e "$tmp" && break if [ -d "$tmp" ] then EEcho illegal "$tmp" continue fi ls -l "$tmp" || : rm -i "$tmp" || : done # nothing to do for xinodir cd $OLDPWD done aufs.in.5000066400000000000000000002775501315652647700125470ustar00rootroot00000000000000.\".so aufs.tmac . .eo .de TQ .br .ns .TP \$1 .. .de Bu .IP \(bu 4 .. .ec .\" end of macro definitions . .\" ---------------------------------------------------------------------- .TH aufs 5 \*[AUFS_VERSION] Linux "Linux Aufs User's Manual" .SH NAME aufs \- advanced multi layered unification filesystem. version \*[AUFS_VERSION] .\" ---------------------------------------------------------------------- .SH DESCRIPTION Aufs is a stackable unification filesystem such as Unionfs, which unifies several directories and provides a merged single directory. In the early days, aufs was entirely re-designed and re-implemented Unionfs Version 1.x series. After many original ideas, approaches and improvements, it becomes totally different from Unionfs while keeping the basic features. See Unionfs Version 1.x series for the basic features. Recently, Unionfs Version 2.x series begin taking some of same approaches to aufs's. .\" ---------------------------------------------------------------------- .SH MOUNT OPTIONS At mount-time, the order of interpreting options is, .RS .Bu simple flags, except xino/noxino and udba=notify .Bu branches .Bu xino/noxino .Bu udba=notify .RE At remount-time, the options are interpreted in the given order, e.g. left to right. .RS .Bu create or remove whiteout\-base(\*[AUFS_WH_BASE]) and whplink\-dir(\*[AUFS_WH_PLINKDIR]) if necessary .RE . .TP .B br:BRANCH[:BRANCH ...] (dirs=BRANCH[:BRANCH ...]) Adds new branches. (cf. Branch Syntax). Aufs rejects the branch which is an ancestor or a descendant of another branch. It is called overlapped. When the branch is loopback-mounted directory, aufs also checks the source fs\-image file of loopback device. If the source file is a descendant of another branch, it will be rejected too. After mounting aufs or adding a branch, if you move a branch under another branch and make it descendant of another branch, aufs will not work correctly. By default (since linux\-3.2 until linux\-3.18\-rc1), aufs prohibits such operation internally, but there left a way to do. (cf. Branch Syntax). . .TP .B [ add | ins ]:index:BRANCH Adds a new branch. The index begins with 0. Aufs creates whiteout\-base(\*[AUFS_WH_BASE]) and whplink\-dir(\*[AUFS_WH_PLINKDIR]) if necessary. If there is the same named file on the lower branch (larger index), aufs will hide the lower file. You can only see the highest file. You will be confused if the added branch has whiteouts (including diropq), they may or may not hide the lower entries. .\" It is recommended to make sure that the added branch has no whiteout. (cf. DIAGNOSTICS). Even if a process have once mapped a file by mmap(2) with MAP_SHARED and the same named file exists on the lower branch, the process still refers the file on the lower(hidden) branch after adding the branch. If you want to update the contents of a process address space after adding, you need to restart your process or open/mmap the file again. .\" Usually, such files are executables or shared libraries. (cf. Branch Syntax). . .TP .B del:dir Removes a branch. Aufs does not remove whiteout\-base(\*[AUFS_WH_BASE]) and whplink\-dir(\*[AUFS_WH_PLINKDIR]) automatically. For example, when you add a RO branch which was unified as RW, you will see whiteout\-base or whplink\-dir on the added RO branch. If a process is referencing the file/directory on the deleting branch (by open, mmap, current working directory, etc.), aufs will return an error EBUSY. In this case, a script `aubusy' (in aufs\-util.git and aufs2\-util.git) is useful to identify which process (and which file) makes the branch busy. . .TP .B mod:BRANCH Modifies the permission flags of the branch. Aufs creates or removes whiteout\-base(\*[AUFS_WH_BASE]) and/or whplink\-dir(\*[AUFS_WH_PLINKDIR]) if necessary. If the branch permission is been changing `rw' to `ro', and a process is mapping a file by mmap(2) .\" with MAP_SHARED on the branch, the process may or may not be able to modify its mapped memory region after modifying branch permission flags. Additionally when you enable CONFIG_IMA (in linux\-2.6.30 and later), IMA may produce some wrong messages. But this is equivalent when the filesystem is changed `ro' in emergency. (cf. Branch Syntax). . .TP .B append:BRANCH equivalent to `add:(last index + 1):BRANCH'. (cf. Branch Syntax). . .TP .B prepend:BRANCH equivalent to `add:0:BRANCH.' (cf. Branch Syntax). . .TP .B xino=filename Use external inode number bitmap and translation table. When CONFIG_AUFS_EXPORT is enabled, external inode generation table too. It is set to /\*[AUFS_XINO_FNAME] by default, or \*[AUFS_XINO_DEFPATH]. Comma character in filename is not allowed. The files are created per an aufs and per a branch filesystem, and unlinked. So you cannot find this file, but it exists and is read/written frequently by aufs. When the specified file already exists, then mount(8) returns an error. (cf. External Inode Number Bitmap, Translation Table and Generation Table). If you enable CONFIG_SYSFS, the path of xino files are not shown in /proc/mounts (and /etc/mtab), instead it is shown in /fs/aufs/si_/xi_path. Otherwise, it is shown in /proc/mounts unless it is not the default path. . .TP .B noxino Stop using external inode number bitmap and translation table. If you use this option, Some applications will not work correctly. .\" And pseudo link feature will not work after the inode cache is .\" shrunk. (cf. External Inode Number Bitmap, Translation Table and Generation Table). . .TP .B trunc_xib Truncate the external inode number bitmap file. The truncation is done automatically when you delete a branch unless you do not specify `notrunc_xib' option. (cf. External Inode Number Bitmap, Translation Table and Generation Table). . .TP .B notrunc_xib Stop truncating the external inode number bitmap file when you delete a branch. (cf. External Inode Number Bitmap, Translation Table and Generation Table). . .TP .B trunc_xino_path=BRANCH | itrunc_xino=INDEX Truncate the external inode number translation table per branch. The branch can be specified by path or index (its origin is 0). Sometimes the size of a xino file for tmpfs branch grows very big. If you don't like such situation, try "mount \-o remount,trunc_xino_path=BRANCH /your/aufs" (or itrunc_xino=INDEX). It will shrink the xino file for BRANCH. These options are one time actions. So the size may grow again. In order to make it work automatically when necessary, try trunc_xino option. These options are already implemented, but its design is not fixed (cf. External Inode Number Bitmap, Translation Table and Generation Table). . .TP .B trunc_xino | notrunc_xino Enable (or disable) the automatic truncation of xino files. The truncation is done by discarding the internal "hole" (unused blocks). .\" When the number of blocks by the xino file for the branch exceeds .\" the predefined upper limit, the automatic truncation begins. If the xino .\" files contain few holes and the result size is still exceeds the upper .\" limit, then the upper limit is added by \*[AUFS_XINO_TRUNC_STEP] blocks. The .\" initial upper limit is \*[AUFS_XINO_TRUNC_INIT] blocks. .\" Currently the type of branch fs supported by this automatic truncation .\" is tmpfs or ramfs only. The default is notrunc_xino. These options are already implemented, but its design is not fixed (cf. External Inode Number Bitmap, Translation Table and Generation Table). TODO: customizable two values for upper limit \" . \" .TP \" .B trunc_xino_v=n:n . .TP .B acl .TQ .B noacl Enable or disable POSIX Access Control List. This feature is totally depending upon the branch fs. If your branch fs doesn't support POSIX ACL, these options are meaningless. CONFIG_FS_POSIX_ACL is required. . .TP .B create_policy | create=CREATE_POLICY .TQ .B copyup_policy | copyup | cpup=COPYUP_POLICY Policies to select one among multiple writable branches. The default values are `create=tdp' and `cpup=tdp'. link(2) and rename(2) systemcalls have an exception. In aufs, they try keeping their operations in the branch where the source exists. (cf. Policies to Select One among Multiple Writable Branches). . .TP .B dio Enable Direct I/O support (including Linux AIO), and always make open(2) with O_DIRECT success. But if your branch filesystem doesn't support it, then the succeeding I/O will fail (cf, Direct I/O). . .TP .B nodio Disable Direct I/O (including Linux AIO), and always make open(2) with O_DIRECT fail. This is default value (cf, Direct I/O). . .TP .B verbose | v Print some information. Currently, it is only busy file (or inode) at deleting a branch. . .TP .B noverbose | quiet | q | silent Disable `verbose' option. This is default value. . .TP .B sum df(1)/statfs(2) returns the total number of blocks and inodes of all branches. When the block size of all branches are not equal, aufs chooses the smallest one and calculate the number of blocks (including bavail and bfree). Note that there are cases that systemcalls may return ENOSPC, even if df(1)/statfs(2) shows that aufs has some free space/inode. . .TP .B nosum Disable `sum' option. This is default value. . .TP .B dirwh=N Watermark to remove a dir actually at rmdir(2) and rename(2). If the target dir which is being removed or renamed (destination dir) has a huge number of whiteouts, i.e. the dir is empty logically but physically, the cost to remove/rename the single dir may be very high. It is required to unlink all of whiteouts internally before issuing rmdir/rename to the branch. To reduce the cost of single systemcall, aufs renames the target dir to a whiteout-ed temporary name and invokes a pre\-created kernel thread to remove whiteout-ed children and the target dir. The rmdir/rename systemcall returns just after kicking the thread. When the number of whiteout-ed children is less than the value of dirwh, aufs remove them in a single systemcall instead of passing another thread. This value is ignored when the branch is NFS. The default value is \*[AUFS_DIRWH_DEF]. .\" . .\" .TP .\" .B rdcache=N . .TP .B rdblk=N Specifies a size of internal VDIR block which is allocated at a time in byte. The VDIR block will be allocated several times when necessary. If your directory has tens of thousands of files, you may want to expand this size. The default value is defined as \*[AUFS_RDBLK_DEF]. The size has to be lager than NAME_MAX (usually 255) and kmalloc\-able (the maximum limit depends on your system. at least 128KB is available for every system). If you set it to zero, then the internal estimation for the directory size becomes ON, and aufs sets the value for the directory individually. Sometimes the estimated value may be inappropriate since the estimation is not so clever. Setting zero is useful when you use RDU (cf. VDIR/readdir(3) in user\-space (RDU). Otherwise it may be a pressure for kernel memory space. Anytime you can reset the value to default by specifying rdblk=def. (cf. Virtual or Vertical Directory Block). . .TP .B rdhash=N Specifies a size of internal VDIR hash table which is used to compare the file names under the same named directory on multiple branches. The VDIR hash table will be allocated in readdir(3)/getdents(2), rmdir(2) and rename(2) for the existing target directory. If your directory has tens of thousands of files, you may want to expand this size. The default value is defined as \*[AUFS_RDHASH_DEF]. The size has to be lager than zero, and it will be multiplied by 4 or 8 (for 32\-bit and 64\-bit respectively, currently). The result must be kmalloc\-able (the maximum limit depends on your system. at least 128KB is available for every system). If you set it to zero, then the internal estimation for the directory becomes ON, and aufs sets the value for the directory individually. Sometimes the estimated value may be inappropriate since the estimation is not so clever. Setting zero is useful when you use RDU (cf. VDIR/readdir(3) in user\-space (RDU). Otherwise it may be a pressure for kernel memory space. Anytime you can reset the value to default by specifying rdhash=def. (cf. Virtual or Vertical Directory Block). . .TP .B plink .TQ .B noplink Specifies to use `pseudo link' feature or not. The default is `plink' which means use this feature. (cf. Pseudo Link) . .TP .B clean_plink Removes all pseudo\-links in memory. In order to make pseudo\-link permanent, use `auplink' utility just before one of these operations, unmounting aufs, using `ro' or `noplink' mount option, deleting a branch from aufs, adding a branch into aufs, or changing your writable branch as readonly. If you installed both of /sbin/mount.aufs and /sbin/umount.aufs, and your mount(8) and umount(8) support them, `auplink' utility will be executed automatically and flush pseudo\-links. (cf. Pseudo Link) . .TP .B udba=none | reval | notify Specifies the level of UDBA (User's Direct Branch Access) test. (cf. User's Direct Branch Access and Inotify Limitation). . .TP .B diropq=whiteouted | w | always | a Specifies whether mkdir(2) and rename(2) dir case make the created directory `opaque' or not. In other words, to create `\*[AUFS_WH_DIROPQ]' under the created or renamed directory, or not to create. When you specify diropq=w or diropq=whiteouted, aufs will not create it if the directory was not whiteout-ed or opaqued. If the directory was whiteout-ed or opaqued, the created or renamed directory will be opaque. When you specify diropq=a or diropq==always, aufs will always create it regardless the directory was whiteout-ed/opaqued or not. The default value is diropq=w, it means not to create when it is unnecessary. .\" If you define CONFIG_AUFS_COMPAT at aufs compiling time, the default will be .\" diropq=a. .\" You need to consider this option if you are planning to add a branch later .\" since `diropq' affects the same named directory on the added branch. . .TP .B warn_perm .TQ .B nowarn_perm Adding a branch, aufs will issue a warning about uid/gid/permission of the adding branch directory, when they differ from the existing branch's. This difference may or may not impose a security risk. If you are sure that there is no problem and want to stop the warning, use `nowarn_perm' option. The default is `warn_perm' (cf. DIAGNOSTICS). . .TP .B shwh .TQ .B noshwh By default (noshwh), aufs doesn't show the whiteouts and they just hide the same named entries in the lower branches. The whiteout itself also never be appeared. If you enable CONFIG_AUFS_SHWH and specify `shwh' option, aufs will show you the name of whiteouts with keeping its feature to hide the lowers. Honestly speaking, I am rather confused with this `visible whiteouts.' But a user who originally requested this feature wrote a nice how\-to document about this feature. See Tips file in the aufs CVS tree. . .TP .B dirperm1 .TQ .B nodirperm1 By default (nodirperm1), aufs respects the directory permission bits on all branches equally, which means if the permission bits for a directory on a lower readonly branch prohibits you to read, then you cannot read even if you run "chmod a+rx" (and aufs copies it up). With this option (dirperm1), the behavior changes and aufs checks the permission bits of the directory on the topmost branch and the permission bits on all lower branches are ignored. In other words, you read a directory even if the lower readonly branch fs prohibits it by its permission bits. This feature may invite a security risk similar to the world writable upper branch. As this case, dirperm1 option will produce a warning too. . .TP .B dirren .TQ .B nodirren Activates or disactivates the special handling for renaming a directory (DIRREN) feature. In order to use this feature, CONFIG_AUFS_DIRREN has to be enabled and `dirren' mount option has to be specified too. By default (nodirren), aufs returns an error with EXDEV for the case of rename(2) a directory which exists on the multiple branches. Note that DIRREN is slow (I have not yet measured it though) since it loads and saves the list of the inode\-numbers per branch and the detailed information per branch. Note that `udba=notify' option may not work with DIRREN, since it is based upon the name, while DIRREN handles both of before\- and after\-renamed names. The internal name comparision may not work correctly. In this case, aufs behaves like the default `udba=reval' is specified. .\" ---------------------------------------------------------------------- .SH Module Parameters .TP .B brs=1 | 0 Specifies to use the branch path data file under sysfs or not. If the number of your branches is large or their path is long and you meet the limitation of mount(8) ro /etc/mtab, you need to enable CONFIG_SYSFS and set aufs module parameter brs=1. When this parameter is set as 1, aufs does not show `br:' (or dirs=) mount option through /proc/mounts (and /etc/mtab). So you can keep yourself from the page limitation of mount(8) or /etc/mtab. Aufs shows branch paths through /fs/aufs/si_XXX/brNNN. Actually the file under sysfs has also a size limitation, but I don't think it is harmful. There is one more side effect in setting 1 to this parameter. If you rename your branch, the branch path written in /etc/mtab will be obsoleted and the future remount will meet some error due to the unmatched parameters (Remember that mount(8) may take the options from /etc/mtab and pass them to the systemcall). If you set 1, /etc/mtab will not hold the branch path and you will not meet such trouble. On the other hand, the entries for the branch path under sysfs are generated dynamically. So it must not be obsoleted. But I don't think users want to rename branches so often. If CONFIG_SYSFS is disable, this parameter is always set to 0. . .TP .B allow_userns= Y | N Allows an unprivileged mount under user namespace. Userns mount to put AUFS into a chroot environment can be useful while it as a security worry. This parameter sets an internal flag FS_USERNS_MOUNT and allows userns unconditionally. See the discussion in http://www.mail\-archive.com/aufs\-users@lists.sourceforge.net/msg04266.html and its thread. The default is `N'. If CONFIG_USER_NS is disabled, this parameter is meaningless. . .TP .B sysrq=key Specifies MagicSysRq key for debugging aufs. You need to enable both of CONFIG_MAGIC_SYSRQ and CONFIG_AUFS_DEBUG. Currently this is for developers only. The default is `a'. . .TP .B debug= 0 | 1 Specifies disable(0) or enable(1) debug print in aufs. This parameter can be changed dynamically. You need to enable CONFIG_AUFS_DEBUG. Currently this is for developers only. The default is `0' (disable). .\" ---------------------------------------------------------------------- .SH Entries under Sysfs and Debugfs See linux/Documentation/ABI/*/{sys,debug}fs\-aufs. .\" ---------------------------------------------------------------------- .SH Gain the performance in return for the features In order to gain a better performance, there are a few steps. They are essentially to drop the features from aufs, and to gain a performance in return for them. You don't have to drop all of them. It may be too much. Try step by step with measuring the performance you want using your typical workload. .SS Patch file . As my recommendation, there is one patch file in aufs[34]\-standalone.git tree, tmpfs\-idr.patch. It introduces IDR for the tmpfs inode\-number management, and has an effect to prevent the size of aufs's XINO/XIB files to grow rapidly. If you don't use TMPFS as your branch, the patch won't be necessary. .SS Configuration . Disable all unnecessary ones except CONFIG_AUFS_RDU (readdir in user\-space). RDU requires an external user\-space library libau.so, but it is so effective particularly for the directory which has tens of thousands of files. To use RDU, users have to set LD_PRELOAD environment variable. If he doesn't set, this configuration will do no harm. The size of aufs module will be larger a little, but the time\-performance (speed) won't be damaged. .SS Mount option . As a first step, I'd recommend you to try `dirperm1', `udba=none' and `nodirren.' The former prohibits aufs to dig down the lower branches in checking the directory permission bits, and the latter makes aufs not to watch the external modification, eg. by\-passing aufs (users' direct branch access). These options are able to be changed and restored anytime. For the second step, try `notrunc_xino' and `notrunc_xib.' It is not always when they are so effective. Especially if you have applied tmpfs\-idr.patch, then the effect is small since the most of effect is done by the patch. But there surely exists their effect. In this case, the size of XINO and XIB will grow only, not truncated. In other word, it is a time\-vs\-space issue. For the third and last step, try `noplink' and `noxino.' With these options, aufs behaves un\-POSIX\-ly a little, which means lose the features maintaining the hard\-link (pseudo\-link) and the inode numbers. Some behaviours may surprise users, but many internal process will be skipped and the result performance will be better. For your convenience, mount.aufs(8) provides `\*[DROPLVL]=N' mount option. `N' means the level (see above) and you can specify either 1, 2 or 3 (and their negative values, will be described soon). It is not a real mount option, which means it is not interpreted by kernel\-space. When this option is given, mount.aufs(8) translates it into several other (real) mount options, and passes them to kernel\-space as if they were originally specified. Currently there are 3 levels. .RS .nr step 1 1 .IP \n[step] 4 \*[DROPLVL1] .IP \n+[step] \*[DROPLVL2] .IP \n+[step] \*[DROPLVL3] .RE For example, when you give `\*[DROPLVL]=3', mount.aufs(8) converts it to `\*[DROPLVL1],\*[DROPLVL2],\*[DROPLVL3]'. For your more convenience, mount.aufs(8) provides the negative values for each level. Note that there is no level 0, and no difference between 2 and \-2. The options in `\*[DROPLVL2]' are already implemented, but their design is not fixed (cf. External Inode Number Bitmap, Translation Table and Generation Table). And the current default value is `\*[DROPLVL2]', so technically speaking `\*[DROPLVL2]' is less important. .RS .nr step 1 1 .IP \-\n[step] 4 \*[DROPLVL1R] .IP \-\n+[step] \*[DROPLVL2R] .IP \-\n+[step] \*[DROPLVL3R] .RE The purpose of the negative values are to revert the effect of the positive values (counter\-level). Note the XINO path in `\-3'. In order to revert `noxino' in `\*[DROPLVL]=3', you need to specify the actual XINO path, but it is totally depending upon your environment, and mount.aufs(8) doesn't know about it and does nothing but provides the default path. So generally it will be necessary to append `xino=' to `\*[DROPLVL]=\-3'. Reverting `noatime' to `relatime' is rather tricky. It is due to the behaviours of mount(8) and mount(2). You need to run `remount,strictatime' before `remount,\*[DROPLVL]=\-1'. Also note that the order of the mount options. For example, if you want to drop some features but keep UDBA level as default, then you can specify `\*[DROPLVL]=1,udba=reval'. If you write the reverse order as `udba=reval,\*[DROPLVL]=1', then `udba=none' in `\*[DROPLVL]=1' takes its effect and the udba level specified before \*[DROPLVL] will lose. .\" ---------------------------------------------------------------------- .SH Git work\-tree and aufs Git has a cache called `index' file. In this cache there are the identity of the files individually. Here `identity' means a pair of struct stat.st_{dev,ino}. (Git may consider other stat members too. But the essential part of the identity is still dev and ino.) Since aufs is a virtual filesystem and manages the inode numbers, it provides its own st_dev and st_ino. They differ from the `index' cache in git, and some git operations have to refresh the `index' cache, which may take long time. For instance, .RS .Bu /branch/ro has 0x0801 for its st_dev .Bu /branch/ro/proj.git/fileA has 100 for its st_ino .Bu /branch/ro/proj.git/.git/index contains {0x0801,100} as fileA's identity .Bu mount /u as /branch/rw + /branch/ro, /u is aufs .Bu we can see the contents of /u/proj.git/.git/index is equivalent to /branch/ro/proj.git/.git/index .RE In this case, aufs provides {0x0802,110} (for example) for fileA's identity, which is different from /branch/ro/proj.git/fileA. If you run git\-diff or something, the behaviour of git differs a little. .RS .Bu git issues stat(2) and gets st_{dev,ino} pair. .Bu git compares the gotten pair and the one in the index file. .Bu when they are different from each other, git opens the file, reads all data, compares it with the cached data, and finds there is nothing changed. .Bu if the gotten pair is equal to the one in the index file, then open/read/compare steps will be skipped. .RE This issue can happen when you copy the git working tree to somewhere else. All files identity will be changed by the copy and the cached identity in index file will be obsoleted. Once you complete git\-status or something, the index file will be updated, and full open/read/compare steps will not happen anymore. This behaviour of git can be controlled by git's configuration core.checkstat. .\" ---------------------------------------------------------------------- .SH Branch Syntax .TP .B dir_path[ =permission [ + attribute ] ] .TQ .B permission := rw | ro | rr .TQ .B attribute := wh | nolwh | unpin | coo_reg | coo_all | moo | fhsm | icexsec | icexsys | icextr | icexusr | icexoth | icex dir_path is a directory path. The keyword after `dir_path=' is a permission flags for that branch. Comma, colon and the permission flags string (including `=') in the path are not allowed. Any (ordinary) filesystem can be a branch, But some are not accepted such like sysfs, procfs and unionfs. If you specify such filesystems as an aufs branch, aufs will return an error saying it is unsupported. Also aufs expects the writable branch filesystem supports the maximum filename length as NAME_MAX. The readonly branch filesystem can be shorter. Cramfs in linux stable release has strange inodes and it makes aufs confused. For example, .nf $ mkdir -p w/d1 w/d2 $ > w/z1 $ > w/z2 $ mkcramfs w cramfs $ sudo mount -t cramfs -o ro,loop cramfs /mnt $ find /mnt -ls 76 1 drwxr-xr-x 1 jro 232 64 Jan 1 1970 /mnt 1 1 drwxr-xr-x 1 jro 232 0 Jan 1 1970 /mnt/d1 1 1 drwxr-xr-x 1 jro 232 0 Jan 1 1970 /mnt/d2 1 1 -rw-r--r-- 1 jro 232 0 Jan 1 1970 /mnt/z1 1 1 -rw-r--r-- 1 jro 232 0 Jan 1 1970 /mnt/z2 .fi All these two directories and two files have the same inode with one as their link count. Aufs cannot handle such inode correctly. Currently, aufs involves a tiny workaround for such inodes. But some applications may not work correctly since aufs inode number for such inode will change silently. If you do not have any empty files, empty directories or special files, inodes on cramfs will be all fine. A branch should not be shared as the writable branch between multiple aufs. A readonly branch can be shared. The maximum number of branches is configurable at compile time (127 by default). When an unknown permission or attribute is given, aufs sets ro to that branch silently. .SS Permission . .TP .B rw Readable and writable branch. Set as default for the first branch. If the branch filesystem is mounted as readonly, you cannot set it `rw.' .\" A filesystem which does not support link(2) and i_op\->setattr(), for .\" example FAT, will not be used as the writable branch. . .TP .B ro Readonly branch and it has no whiteouts on it. Set as default for all branches except the first one. Aufs never issue both of write operation and lookup operation for whiteout to this branch. . .TP .B rr Real readonly branch, special case of `ro', for natively readonly branch. Assuming the branch is natively readonly, aufs can optimize some internal operation. For example, if you specify `udba=notify' option, aufs does not set fsnotify or inotify for the things on rr branch. Set by default for a branch whose fs\-type is either `iso9660', `cramfs' or `romfs' (and `squashfs' for linux\-2.6.29 and later). When your branch exists on slower device and you have some capacity on your hdd, you may want to try ulobdev tool in ULOOP sample. It can cache the contents of the real devices on another faster device, so you will be able to get the better access performance. The ulobdev tool is for a generic block device, and the ulohttp is for a filesystem image on http server. If you want to spin down your hdd to save the battery life or something, then you may want to use ulobdev to save the access to the hdd, too. See $AufsCVS/sample/uloop in detail. .SS Attribute . .TP .B wh Readonly branch and it has/might have whiteouts on it. Aufs never issue write operation to this branch, but lookup for whiteout. Use this as `=ro+wh'. . .TP .B nolwh Usually, aufs creates a whiteout as a hardlink on a writable branch. This attributes prohibits aufs to create the hardlinked whiteout, including the source file of all hardlinked whiteout (\*[AUFS_WH_BASE].) If you do not like a hardlink, or your writable branch does not support link(2), then use this attribute. But I am afraid a filesystem which does not support link(2) natively will fail in other place such as copy\-up. Use this as `=rw+nolwh'. Also you may want to try `noplink' mount option, while it is not recommended. . .TP .B unpin By default, aufs sets `pin' to the branch dir, which means that users cannot remove nor rename the branch top dir as if it were a mount\-point. In some cases and some users may need to rename the branch top dir. So this attribute is implemented. If you specify `unpin' as a branch attribute, it stops behaving as a mount\-point and you can rename the branch top dir. Needless to say, if you remove the branch top dir, then aufs cannot work. Since linux\-3.18\-rc1, this attribute became meaningless. It is simply ignored and all branch top dir behaves as this attribute is always specified. . .TP .B coo_reg | coo_all Copy\-up on open. By default the internal copy\-up is executed when it is really necessary. It is not done when a file is opened for writing, but when any writing is done. These attributes are for not only the readonly branches but also the writable branches. `coo_reg' handles the regular files only and `coo_all' handles the regular files plus the directories. All special files and symlinks will not be copied\-up. Additionally NFS server may not issue open(2) when NFS client issues open(2). This behavior means that the file may not be copied\-up when NFS client issues open(2). The internal copy\-up operation by these attributes are unrelated to the COPYUP_POLICY (cf. Policies to Select One among Multiple Writable Branches), which means `copy\-up on open' always choose the nearest upper writable branch. Even if there are multiple writable branches set these attributes, the internal copy\-up operation is done once, not recursively. Users who have many (over 100) branches want to know and analyze when and what file is copied\-up. To insert a new upper branch which contains such files only may improve the performance of aufs. The `copy\-up on open' itself may not be so attractive, but combining with a feature FHSM (File\-based Hierarchy Storage Management) will be useful. . .TP .B moo Move\-up on open. Very similar attribute to coo except moo unlinks the copy\-up source after the successful operation. This attribute handles the regular files only, and obviously cannot be specified to the readonly branch. Users can specify all these attributes for a single writable branch, but only the last specified one has its effect. Other coo/moo attributes are silently ignored. The `move\-up on open' itself may not be so attractive, but combining with a feature FHSM (File\-based Hierarchy Storage Management) will be useful. . .TP .B fhsm File\-based Hierarchy Storage Management. Specifies that this branch is a participant of aufs FHSM. Refer to .B aufs_fhsm(5) in detail. . .TP .B icexsec | icexsys | icextr | icexusr | icexoth | icex Ignore the error on copying\-up/down XATTR. When an internal copy\-up/down happens, aufs tries copying all XATTRs. Here an error can happen because of the XATTR support on the dst branch may different from the src branch. If you know how the branch supports or unsupports XATTR, you can specify these attributes. `icexsec' means to ignore an error on copying\-up/down XATTR categorized as "security" (for LSM and capability). And `icexsys,' `icextr,' and `icexusr,' are for "system" (for posix ACL), "trusted" and "user" categories individually. `icexoth' is for any other category. To be convenient, `icex` sets them all. See also linux/Documentation/filesystems/aufs/design/06xattr.txt. These attributes are essentially for the writable branches. But when you use .B aufs_fhsm(5), you may want to specify them to the readonly branches too. So they are available for the readonly branches. .\" .SS FUSE as a branch .\" A FUSE branch needs special attention. .\" The struct fuse_operations has a statfs operation. It is OK, but the .\" parameter is struct statvfs* instead of struct statfs*. So almost .\" all user\-space implementation will call statvfs(3)/fstatvfs(3) instead of .\" statfs(2)/fstatfs(2). .\" In glibc, [f]statvfs(3) issues [f]statfs(2), open(2)/read(2) for .\" /proc/mounts, .\" and stat(2) for the mountpoint. With this situation, a FUSE branch will .\" cause a deadlock in creating something in aufs. Here is a sample .\" scenario, .\" .\" .RS .\" .\" .IN -10 .\" .Bu .\" create/modify a file just under the aufs root dir. .\" .Bu .\" aufs acquires a write\-lock for the parent directory, ie. the root dir. .\" .Bu .\" A library function or fuse internal may call statfs for a fuse branch. .\" The create=mfs mode in aufs will surely call statfs for each writable .\" branches. .\" .Bu .\" FUSE in kernel\-space converts and redirects the statfs request to the .\" user\-space. .\" .Bu .\" the user\-space statfs handler will call [f]statvfs(3). .\" .Bu .\" the [f]statvfs(3) in glibc will access /proc/mounts and issue .\" stat(2) for the mountpoint. But those require a read\-lock for the aufs .\" root directory. .\" .Bu .\" Then a deadlock occurs. .\" .\" .RE 1 .\" .\" .IN .\" .\" In order to avoid this deadlock, I would suggest not to call .\" [f]statvfs(3) from fuse. Here is a sample code to do this. .\" .nf .\" struct statvfs stvfs; .\" .\" main() .\" { .\" statvfs(..., &stvfs) .\" or .\" fstatvfs(..., &stvfs) .\" stvfs.f_fsid = 0 .\" } .\" .\" statfs_handler(const char *path, struct statvfs *arg) .\" { .\" struct statfs stfs .\" .\" memcpy(arg, &stvfs, sizeof(stvfs)) .\" .\" statfs(..., &stfs) .\" or .\" fstatfs(..., &stfs) .\" .\" arg->f_bfree = stfs.f_bfree .\" arg->f_bavail = stfs.f_bavail .\" arg->f_ffree = stfs.f_ffree .\" arg->f_favail = /* any value */ .\" } .\" .fi .\" ---------------------------------------------------------------------- .SH External Inode Number Bitmap, Translation Table and Generation Table (xino) Aufs uses one external bitmap file and one external inode number translation table files per an aufs and per a branch filesystem by default. Additionally when CONFIG_AUFS_EXPORT is enabled, one external inode generation table is added. The bitmap (and the generation table) is for recycling aufs inode number and the others are a table for converting an inode number on a branch to an aufs inode number. The default path is `first writable branch'/\*[AUFS_XINO_FNAME]. If there is no writable branch, the default path will be \*[AUFS_XINO_DEFPATH]. .\" A user who executes mount(8) needs the privilege to create xino .\" file. If you enable CONFIG_SYSFS, the path of xino files are not shown in /proc/mounts (and /etc/mtab), instead it is shown in /fs/aufs/si_/xi_path. Otherwise, it is shown in /proc/mounts unless it is not the default path. Those files are always opened and read/write by aufs frequently. If your writable branch is on flash memory device, it is recommended to put xino files on other than flash memory by specifying `xino=' mount option. The maximum file size of the bitmap is, basically, the amount of the number of all the files on all branches divided by 8 (the number of bits in a byte). For example, on a 4KB page size system, if you have 32,768 (or 2,599,968) files in aufs world, then the maximum file size of the bitmap is 4KB (or 320KB). The maximum file size of the table will be `max inode number on the branch x size of an inode number'. For example in 32bit environment, .nf $ df -i /branch_fs /dev/hda14 2599968 203127 2396841 8% /branch_fs .fi and /branch_fs is an branch of the aufs. When the inode number is assigned contiguously (without `hole'), the maximum xino file size for /branch_fs will be 2,599,968 x 4 bytes = about 10 MB. But it might not be allocated all of disk blocks. When the inode number is assigned discontinuously, the maximum size of xino file will be the largest inode number on a branch x 4 bytes. Additionally, the file size is limited to LLONG_MAX or the s_maxbytes in filesystem's superblock (s_maxbytes may be smaller than LLONG_MAX). So the support\-able largest inode number on a branch is less than 2305843009213693950 (LLONG_MAX/4\-1). This is the current limitation of aufs. On 64bit environment, this limitation becomes more strict and the supported largest inode number is less than LLONG_MAX/8\-1. In order to estimate the size of the table for your readonly branch fs, try .nf $ echo $((4 * $(sudo find /branch_fs -xdev -printf "%i\\n" | sort -n | tail -n 1))) .fi For 64bit environment, replace 4 by 8 in above equation. The xino files are always hidden, i.e. removed. So you cannot do `ls \-l xino_file'. If you enable CONFIG_DEBUG_FS, you can check these information through /aufs//{xib,xi[0\-9]*,xigen}. xib is for the bitmap file, xi0 ix for the first branch, and xi1 is for the next. xigen is for the generation table. xib and xigen are in the format of, .nf x .fi Note that a filesystem usually has a feature called pre\-allocation, which means a number of blocks are allocated automatically, and then deallocated silently when the filesystem thinks they are unnecessary. You do not have to be surprised the sudden changes of the number of blocks, when your filesystem which xino files are placed supports the pre\-allocation feature. The rests are hidden xino file information in the format of, .nf , x .fi If the file count is larger than 1, it means some of your branches are on the same filesystem and the xino file is shared by them. Note that the file size may not be equal to the actual consuming blocks since xino file is a sparse file, i.e. a hole in a file which does not consume any disk blocks. Once you unmount aufs, the xino files for that aufs are totally gone. It means that the inode number is not permanent across umount or shutdown. The xino files should be created on the filesystem except NFS. If your first writable branch is NFS, you will need to specify xino file path other than NFS. Also if you are going to remove the branch where xino files exist or change the branch permission to readonly, you need to use xino option before del/mod the branch. The bitmap file and the table can be truncated. For example, if you delete a branch which has huge number of files, many inode numbers will be recycled and the bitmap will be truncated to smaller size. Aufs does this automatically when a branch is deleted. You can truncate it anytime you like if you specify `trunc_xib' mount option. But when the accessed inode number was not deleted, nothing will be truncated. The truncation is essentially equivalent to .nf $ cp --sparse=always && rm .fi It means that you have two xino files during the copy, and you should pay attention to the free space of the filesystem where the xino file is located. If the free space is not large enough to hold two xino files temporary during the copy, then the truncation fails and the xino file will go on growing. For such case, you should move the xino file to another larger partition, and move it back to where it was (if you want). To do this, use `xino=' mount option. During this move, the xino file is truncated automatically. If you do not want to truncate it (it may be slow) when you delete a branch, specify `notrunc_xib' after `del' mount option. For the table, see trunc_xino_path=BRANCH, itrunc_xino=INDEX, trunc_xino and notrunc_xino option. If you do not want to use xino, use noxino mount option. Use this option with care, since the inode number may be changed silently and unexpectedly anytime. For example, rmdir failure, recursive chmod/chown/etc to a large and deep directory or anything else. And some applications will not work correctly. .\" When the inode number has been changed, your system .\" can be crazy. If you want to change the xino default path, use xino mount option. After you add branches, the persistence of inode number may not be guaranteed. At remount time, cached but unused inodes are discarded. And the newly appeared inode may have different inode number at the next access time. The inodes in use have the persistent inode number. When aufs assigned an inode number to a file, and if you create the same named file on the upper branch directly, then the next time you access the file, aufs may assign another inode number to the file even if you use xino option. Some applications may treat the file whose inode number has been changed as totally different file. .\" ---------------------------------------------------------------------- .SH Pseudo Link (hardlink over branches) Aufs supports `pseudo link' which is a logical hard\-link over branches (cf. ln(1) and link(2)). In other words, a copied\-up file by link(2) and a copied\-up file which was hard\-linked on a readonly branch filesystem. When you have files named fileA and fileB which are hardlinked on a readonly branch, if you write something into fileA, aufs copies\-up fileA to a writable branch, and write(2) the originally requested thing to the copied\-up fileA. On the writable branch, fileA is not hardlinked. But aufs remembers it was hardlinked, and handles fileB as if it existed on the writable branch, by referencing fileA's inode on the writable branch as fileB's inode. Once you unmount aufs, the plink info for that aufs kept in memory are totally gone. It means that the pseudo\-link is not permanent. If you want to make plink permanent, try `auplink' utility just before one of these operations, unmounting your aufs, using `ro' or `noplink' mount option, deleting a branch from aufs, adding a branch into aufs, or changing your writable branch to readonly. This utility will reproduces all real hardlinks on a writable branch by linking them, and removes pseudo\-link info in memory and temporary link on the writable branch. Since this utility access your branches directly, you cannot hide them by `mount \-\-bind /tmp /branch' or something. If you are willing to rebuild your aufs with the same branches later, you should use auplink utility before you umount your aufs. If you installed both of /sbin/mount.aufs and /sbin/umount.aufs, and your mount(8) and umount(8) support them, `auplink' utility will be executed automatically and flush pseudo\-links. During this utility is running, it puts aufs into the pseudo\-link maintenance mode. In this mode, only the process which began the maintenance mode (and its child processes) is allowed to operate in aufs. Some other processes which are not related to the pseudo\-link will be allowed to run too, but the rest have to return an error or wait until the maintenance mode ends. If a process already acquires an inode mutex (in VFS), it has to return an error. Due to the fact that the pseudo\-link maintenance mode is operated via procfs, the pseudo\-link feature itself (including the related mount options) depends upon CONFIG_PROC_FS too. .nf # auplink /your/aufs/root flush # umount /your/aufs/root or # auplink /your/aufs/root flush # mount -o remount,mod:/your/writable/branch=ro /your/aufs/root or # auplink /your/aufs/root flush # mount -o remount,noplink /your/aufs/root or # auplink /your/aufs/root flush # mount -o remount,del:/your/aufs/branch /your/aufs/root or # auplink /your/aufs/root flush # mount -o remount,append:/your/aufs/branch /your/aufs/root .fi The plinks are kept both in memory and on disk. When they consumes too much resources on your system, you can use the `auplink' utility at anytime and throw away the unnecessary pseudo\-links in safe. Additionally, the `auplink' utility is very useful for some security reasons. For example, when you have a directory whose permission flags are 0700, and a file who is 0644 under the 0700 directory. Usually, all files under the 0700 directory are private and no one else can see the file. But when the directory is 0711 and someone else knows the 0644 filename, he can read the file. Basically, aufs pseudo\-link feature creates a temporary link under the directory whose owner is root and the permission flags are 0700. But when the writable branch is NFS, aufs sets 0711 to the directory. When the 0644 file is pseudo\-linked, the temporary link, of course the contents of the file is totally equivalent, will be created under the 0711 directory. The filename will be generated by its inode number. While it is hard to know the generated filename, someone else may try peeping the temporary pseudo\-linked file by his software tool which may try the name from one to MAX_INT or something. In this case, the 0644 file will be read unexpectedly. I am afraid that leaving the temporary pseudo\-links can be a security hole. It makes sense to execute `auplink /your/aufs/root flush' periodically, when your writable branch is NFS. When your writable branch is not NFS, or all users are careful enough to set 0600 to their private files, you do not have to worry about this issue. If you do not want this feature, use `noplink' mount option. .SS The behaviors of plink and noplink This sample shows that the `f_src_linked2' with `noplink' option cannot follow the link. .nf none on /dev/shm/u type aufs (rw,xino=/dev/shm/rw/.aufs.xino,br:/dev/shm/rw=rw:/dev/shm/ro=ro) $ ls -li ../r?/f_src_linked* ./f_src_linked* ./copied ls: ./copied: No such file or directory 15 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked 15 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked2 22 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ./f_src_linked 22 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ./f_src_linked2 $ echo FOO >> f_src_linked $ cp f_src_linked copied $ ls -li ../r?/f_src_linked* ./f_src_linked* ./copied 15 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked 15 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked2 36 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ../rw/f_src_linked 53 -rw-r--r-- 1 jro jro 6 Dec 22 11:03 ./copied 22 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ./f_src_linked 22 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ./f_src_linked2 $ cmp copied f_src_linked2 $ none on /dev/shm/u type aufs (rw,xino=/dev/shm/rw/.aufs.xino,noplink,br:/dev/shm/rw=rw:/dev/shm/ro=ro) $ ls -li ../r?/f_src_linked* ./f_src_linked* ./copied ls: ./copied: No such file or directory 17 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked 17 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked2 23 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ./f_src_linked 23 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ./f_src_linked2 $ echo FOO >> f_src_linked $ cp f_src_linked copied $ ls -li ../r?/f_src_linked* ./f_src_linked* ./copied 17 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked 17 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked2 36 -rw-r--r-- 1 jro jro 6 Dec 22 11:03 ../rw/f_src_linked 53 -rw-r--r-- 1 jro jro 6 Dec 22 11:03 ./copied 23 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ./f_src_linked 23 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ./f_src_linked2 $ cmp copied f_src_linked2 cmp: EOF on f_src_linked2 $ .fi .\" .\" If you add/del a branch, or link/unlink the pseudo-linked .\" file on a branch .\" directly, aufs cannot keep the correct link count, but the status of .\" `pseudo-linked.' .\" Those files may or may not keep the file data after you unlink the .\" file on the branch directly, especially the case of your branch is .\" NFS. If you add a branch which has fileA or fileB, aufs does not follow the pseudo link. The file on the added branch has no relation to the same named file(s) on the lower branch(es). If you use noxino mount option, pseudo link will not work after the kernel shrinks the inode cache. This feature will not work for squashfs before version 3.2 since its inode is tricky. When the inode is hardlinked, squashfs inodes has the same inode number and correct link count, but the inode memory object is different. Squashfs inodes (before v3.2) are generated for each, even they are hardlinked. .\" ---------------------------------------------------------------------- .SH User's Direct Branch Access (UDBA) UDBA means a modification to a branch filesystem manually or directly, e.g. bypassing aufs. While aufs is designed and implemented to be safe after UDBA, it can make yourself and your aufs confused. And some information like aufs inode will be incorrect. For example, if you rename a file on a branch directly, the file on aufs may or may not be accessible through both of old and new name. Because aufs caches various information about the files on branches. And the cache still remains after UDBA. Aufs has a mount option named `udba' which specifies the test level at access time whether UDBA was happened or not. . .TP .B udba=none Aufs trusts the dentry and the inode cache on the system, and never test about UDBA. With this option, aufs runs fastest, but it may show you incorrect data. Additionally, if you often modify a branch directly, aufs will not be able to trace the changes of inodes on the branch. It can be a cause of wrong behavior, deadlock or anything else. It is recommended to use this option only when you are sure that nobody access a file on a branch. It might be difficult for you to achieve real `no UDBA' world when you cannot stop your users doing `find / \-ls' or something. If you really want to forbid all of your users to UDBA, here is a trick for it. With this trick, users cannot see the branches directly and aufs runs with no problem, except `auplink' utility. But if you are not familiar with aufs, this trick may make yourself confused. .nf # d=/tmp/.aufs.hide # mkdir $d # for i in $branches_you_want_to_hide > do > mount -n --bind $d $i > done .fi When you unmount the aufs, delete/modify the branch by remount, or you want to show the hidden branches again, unmount the bound /tmp/.aufs.hide. .nf # umount -n $branches_you_want_to_unbound .fi If you use FUSE filesystem as an aufs branch which supports hardlink, you should not set this option, since FUSE makes inode objects for each hardlinks (at least in linux\-2.6.23). When your FUSE filesystem maintains them at link/unlinking, it is equivalent to `direct branch access' for aufs. . .TP .B udba=reval Aufs tests only the existence of the file which existed. If the existed file was removed on the branch directly, aufs discard the cache about the file and re-lookup it. So the data will be updated. This test is at minimum level to keep the performance and ensure the existence of a file. This is default and aufs runs still fast. This rule leads to some unexpected situation, but I hope it is harmless. Those are totally depends upon cache. Here are just a few examples. . .RS .Bu If the file is cached as negative or not\-existed, aufs does not test it. And the file is still handled as negative after a user created the file on a branch directly. If the file is not cached, aufs will lookup normally and find the file. . .Bu When the file is cached as positive or existed, and a user created the same named file directly on the upper branch. Aufs detects the cached inode of the file is still existing and will show you the old (cached) file which is on the lower branch. . .Bu When the file is cached as positive or existed, and a user renamed the file by rename(2) directly. Aufs detects the inode of the file is still existing. You may or may not see both of the old and new files. TODO: If aufs also tests the name, we can detect this case. .RE If your outer modification (UDBA) is rare and you can ignore the temporary and minor differences between virtual aufs world and real branch filesystem, then try this mount option. . .TP .B udba=notify Aufs sets either `fsnotify' or `inotify' to all the accessed directories on its branches and receives the event about the dir and its children. It consumes resources, cpu and memory. And I am afraid that the performance will be hurt, but it is most strict test level. There are some limitations of linux inotify, see also Inotify Limitation. So it is recommended to leave udba default option usually, and set it to notify by remount when you need it. When a user accesses the file which was notified UDBA before, the cached data about the file will be discarded and aufs re-lookup it. So the data will be updated. When an error condition occurs between UDBA and aufs operation, aufs will return an error, including EIO. To use this option, you need to enable CONFIG_INOTIFY and CONFIG_AUFS_HINOTIFY. In linux\-2.6.31, CONFIG_FSNOTIFY was introduced and CONFIG_INOTIFY was listed in Documentation/feature\-removal\-schedule.txt. In aufs2\-31 and later (until CONFIG_INOTIFY is removed actually), you can choose either `fsnotify' or `inotify' in configuration. Whichever you choose, specify `udba=notify', and aufs interprets it as an abstract name. To rename/rmdir a directory on a branch directory may reveal the same named directory on the lower branch. Aufs tries re-looking up the renamed directory and the revealed directory and assigning different inode number to them. But the inode number including their children can be a problem. The inode numbers will be changed silently, and aufs may produce a warning. If you rename a directory repeatedly and reveal/hide the lower directory, then aufs may confuse their inode numbers too. It depends upon the system cache. When you make a directory in aufs and mount other filesystem on it, the directory in aufs cannot be removed expectedly because it is a mount point. But the same named directory on the writable branch can be removed, if someone wants. It is just an empty directory, instead of a mount point. Aufs cannot stop such direct rmdir, but produces a warning about it. If the pseudo\-linked file is hardlinked or unlinked on the branch directly, its inode link count in aufs may be incorrect. It is recommended to flush the pseudo\-links by auplink script. In linux\-4.2 (and later, probably), for the exported aufs, NFS doesn't show the changes at once and returns ESTALE even if you set udba=notify. It is a natural behaviour of linux NFS's and aufs can do nothing about it. Probably simple "sleep 1" will help. .\" ---------------------------------------------------------------------- .SH Linux Inotify Limitation Unfortunately, current inotify (linux\-2.6.18) has some limitations, and aufs must derive it. \" .SS IN_ATTRIB, updating atime \" When a file/dir on a branch is accessed directly, the inode atime (access \" time, cf. stat(2)) may or may not be updated. In some cases, inotify \" does not fire this event. So the aufs inode atime may remain old. \" .SS IN_ATTRIB, updating nlink \" When the link count of a file on a branch is incremented by link(2) \" directly, \" inotify fires IN_CREATE to the parent \" directory, but IN_ATTRIB to the file. So the aufs inode nlink may \" remain old. .SS IN_DELETE, removing file on NFS When a file on a NFS branch is deleted directly, inotify may or may not fire IN_DELETE event. It depends upon the status of dentry (DCACHE_NFSFS_RENAMED flag). In this case, the file on aufs seems still exists. Aufs and any user can see the file. Since linux\-3.15\-rc1, this behavior has been changed and NFS fires the event from itself. .SS IN_IGNORED, deleted rename target When a file/dir on a branch is unlinked by rename(2) directly, inotify fires IN_IGNORED which means the inode is deleted. Actually, in some cases, the inode survives. For example, the rename target is linked or opened. In this case, inotify watch set by aufs is removed by VFS and inotify. And aufs cannot receive the events anymore. So aufs may show you incorrect data about the file/dir. .\" ---------------------------------------------------------------------- .SH Virtual or Vertical Directory Block (VDIR) In order to provide the merged view of file listing, aufs builds internal directory block on memory. For readdir, aufs performs readdir() internally for each dir on branches, merges their entries with eliminating the whiteout\-ed ones, and sets it to the opened file (dir) object. So the file object has its entry list until it is closed. The entry list will be updated when the file position is zero (by rewinddir(3)) and becomes obsoleted. The merged result is cached in the corresponding inode object and maintained by a customizable life-time option. Note: the mount option `rdcache=' is still under considering and its description is hidden from this manual. Some people may call it can be a security hole or invite DoS attack since the opened and once readdir\-ed dir (file object) holds its entry list and becomes a pressure for system memory. But I would say it is similar to files under /proc or /sys. The virtual files in them also holds a memory page (generally) while they are opened. When an idea to reduce memory for them is introduced, it will be applied to aufs too. The dynamically allocated memory block for the name of entries has a unit of \*[AUFS_RDBLK_DEF] bytes by default. During building dir blocks, aufs creates hash list (hashed and divided by \*[AUFS_RDHASH_DEF] by default) and judging whether the entry is whiteout-ed by its upper branch or already listed. These values are suitable for normal environments. But you may have tens of thousands of files or very long filenames under a single directory. For such cases, you may need to customize these values by specifying rdblk= and rdhash= aufs mount options. For instance, there are 97 files under my /bin, and the total name length is 597 bytes. .nf $ \\ls -1 /bin | wc 97 97 597 .fi Strictly speaking, 97 end\-of\-line codes are included. But it is OK since aufs VDIR also stores the name length in 1 byte. In this case, you do not need to customize the default values. 597 bytes filenames will be stored in 2 VDIR memory blocks (597 < \*[AUFS_RDBLK_DEF] x 2). And 97 filenames are distributed among \*[AUFS_RDHASH_DEF] lists, so one list will point 4 names in average. To judge the names is whiteout-ed or not, the number of comparison will be 4. 2 memory allocations and 4 comparison costs low (even if the directory is opened for a long time). So you do not need to customize. If your directory has tens of thousands of files, the you will need to specify rdblk= and rdhash=. .nf $ ls -U /mnt/rotating-rust | wc -l 1382438 .fi In this case, assuming the average length of filenames is 6, in order to get better time performance I would recommend to set $((128*1024)) or $((64*1024)) for rdblk, and $((8*1024)) or $((4*1024)) for rdhash. You can change these values of the active aufs mount by "mount \-o remount". This customization is not for reducing the memory space, but for reducing time for the number of memory allocation and the name comparison. The larger value is faster, in general. Of course, you will need system memory. This is a generic "time\-vs\-space" problem. .\" ---------------------------------------------------------------------- .SH Using libau.so There is a dynamic shared object library called libau.so in aufs\-util or aufs2\-util GIT tree. This library provides several useful functions which wrap the standard library functions such as, .RS .Bu readdir, readdir_r, closedir .Bu pathconf, fpathconf .RE To use libau.so, .RS .Bu install by "make install_ulib" under aufs\-util (or aufs2\-util) GIT tree .Bu set the environment variable "LD_PRELOAD=libau.so", or configure /etc/ld.so.preload .Bu set the environment variable "\*[LibAuEnv]=all" .Bu and run your application. .RE If you use pathconf(3)/fpathconf(3) with _PC_LINK_MAX for aufs, you need to use libau.so. .SS VDIR/readdir(3) in user\-space (RDU) If you have a directory which has tens of thousands of files, aufs VDIR consumes much memory. So the program which reads a huge directory may produce an "out of memory" or "page allocation failure" message in the syslog, due to the memory fragmentation or real starvation. In this case, RDU (readdir(3) in user\-space) may help you. Because the kernel memory space cannot be swappable and consuming much can be pure memory pressure, while it is not true in user\-space. If you enable CONFIG_AUFS_RDU at compiling aufs, install libau.so, and set some environment variables, then you can use RDU. Just simply run your application. The dynamic link library libau.so implements another readdir routine, and all readdir(3) calls in your application will be handled by libau.so. For setting environment variables, you may want to use a shell function or alias such like this. .nf $ auls() > { > LD_PRELOAD=/your/path/to/libau.so > \*[LibAuEnv]=all > #AUFS_RDU_BLK= set if you want > ls $@ > } $ alias auls="LD_PRELOAD=/your/path/to/libau.so \*[LibAuEnv]=all ls" .fi When you call readdir(3), the dynamic linker calls readdir in libau.so. If it finds the passed dir is NOT aufs, it calls the usual readdir(3). It the dir is aufs, then libau.so gets all filenames under the dir by aufs specific ioctl(2)s, instead of regular readdir(3), and merges them by itself. In other words, libau.so moves the memory consumption in kernel\-space to user\-space. While it is good to stop consuming much memory in kernel\-space, sometimes the speed performance may be damaged a little as a side effect. It is just a little, I hope. At the same time, I won't be surprised if readdir(3) runs faster. It is recommended to specify rdblk=0 when you use this library. If your directory is not so huge and you don't meet the out of memory situation, probably you don't need this library. The original VDIR in kernel\-space is still alive, and you can live without libau.so. .SS pathconf(_PC_LINK_MAX) Since some implementation of pathconf(3) (and fpathconf(3)) for _PC_LINK_MAX decides the target filesystem type and returns the pre\-defined constant value, when aufs is unknown to the library, it will return the default value (127). Actually the maximum number of the link count in aufs inherits the topmost writable branch filesystem's. But the standard pathconf(3) will not return the correct value. To support such case, libau.so provides a wrapper for pathconf(3) (and fpathconf(3)). When the parameter is _PC_LINK_MAX, the wrapper checks whether the given parameter refers aufs or not. If it is aufs, then it will get the maximum link count from the topmost writable branch internally. Otherwise, it behaves as normal pathconf(3) transparently. .SS Note Since this is a dynamically linked library, it is unavailable if your application is statically linked. And ld.so(8) ignores LD_PRELOAD when the application is setuid/setgid\-ed unless the library is not setuid/setgid\-ed. It is a generic rule of dynamically linked library. Additionally the functions in libau.so are unavailable in these cases too. .RS .Bu the application or library issues getdents(2) instead of readdir(3). .Bu the library which calls readdir(3) internally. e.g. scandir(3). .Bu the library which calls pathconf(3) internally. .RE .\" ---------------------------------------------------------------------- .SH Copy On Write, or aufs internal copyup and copydown Every stackable filesystem which implements copy\-on\-write supports the copyup feature. The feature is to copy a file/dir from the lower branch to the upper internally. When you have one readonly branch and one upper writable branch, and you append a string to a file which exists on the readonly branch, then aufs will copy the file from the readonly branch to the writable branch with its directory hierarchy. It means one write(2) involves several logical/internal mkdir(2), creat(2), read(2), write(2) and close(2) systemcalls before the actual expected write(2) is performed. Sometimes it may take a long time, particularly when the file is very large. If CONFIG_AUFS_DEBUG is enabled, aufs produces a message saying `copying a large file.' You may see the message when you change the xino file path or truncate the xino/xib files. Sometimes those files can be large and may take a long time to handle them. \" .SS a regular file in HFSPLUS \" HFSPLUS acquires an inode mutex lock at closing a file. This behavior \" is not a problem, but aufs doesn't expect such behavior and it had \" caused a deadlock. So aufs added a special handling to copy\-up a \" regular file in HFSPLUS, eg. opens the file internally twice. It means \" there exists an additional overhead in copying a regular file in HFSPLUS. .\" ---------------------------------------------------------------------- .SH Policies to Select One among Multiple Writable Branches Aufs has some policies to select one among multiple writable branches when you are going to write/modify something. There are two kinds of policies, one is for newly create something and the other is for internal copy\-up. You can select them by specifying mount option `create=CREATE_POLICY' or `cpup=COPYUP_POLICY.' These policies have no meaning when you have only one writable branch. If there is some meaning, it must hurt the performance. .SS Exceptions for Policies In every cases below, even if the policy says that the branch where a new file should be created is /rw2, the file will be created on /rw1. . .Bu If there is a readonly branch with `wh' attribute above the policy\-selected branch and the parent dir is marked as opaque, or the target (creating) file is whiteout-ed on the ro+wh branch, then the policy will be ignored and the target file will be created on the nearest upper writable branch than the ro+wh branch. .RS .nf /aufs = /rw1 + /ro+wh/diropq + /rw2 /aufs = /rw1 + /ro+wh/wh.tgt + /rw2 .fi .RE . .Bu If there is a writable branch above the policy\-selected branch and the parent dir is marked as opaque or the target file is whiteout-ed on the branch, then the policy will be ignored and the target file will be created on the highest one among the upper writable branches who has diropq or whiteout. In case of whiteout, aufs removes it as usual. .RS .nf /aufs = /rw1/diropq + /rw2 /aufs = /rw1/wh.tgt + /rw2 .fi .RE . .Bu link(2) and rename(2) systemcalls are exceptions in every policy. They try selecting the branch where the source exists as possible since copyup a large file will take long time. If it can't be, ie. the branch where the source exists is readonly, then they will follow the copyup policy. . .Bu There is an exception for rename(2) when the target exists. If the rename target exists, aufs compares the index of the branches where the source and the target are existing and selects the higher one. If the selected branch is readonly, then aufs follows the copyup policy. .SS Policies for Creating . .TP .B create=tdp | top\-down\-parent Select the highest branch where the parent dir exists. If this branch is not writable, internal copyup will happen. The policy for this copyup is always `bottom\-up.' This is the default policy. . .TP .B create=tdmfs:low[:second] Select the highest writable branch regardless the existence of the parent dir. If the free space of this branch is less than `low' bytes, then the next highest writable branch will be selected. If the free space of all writable branches are less than `low' bytes, then create=mfs policy is applied. For the duration (`second') parameter, see create=mfs[:second] below. FHSM (File\-based Hierarchy Storage Management) may bring you the very similar result, and is more flexible than this policy. . .TP .B create=rr | round\-robin Selects a writable branch in round robin. When you have two writable branches and creates 10 new files, 5 files will be created for each branch. mkdir(2) systemcall is an exception. When you create 10 new directories, all are created on the same branch. . .TP .B create=mfs[:second] | most\-free\-space[:second] Selects a writable branch which has most free space. In order to keep the performance, you can specify the duration (`second') which makes aufs hold the index of last selected writable branch until the specified seconds expires. The seconds is up to \*[AUFS_MFS_MAX_SEC] seconds. The first time you create something in aufs after the specified seconds expired, aufs checks the amount of free space of all writable branches by internal statfs call and the held branch index will be updated. The default value is \*[AUFS_MFS_DEF_SEC] seconds. . .TP .B create=mfsrr:low[:second] Selects a writable branch in most\-free\-space mode first, and then round\-robin mode. If the selected branch has less free space than the specified value `low' in bytes, then aufs re-tries in round\-robin mode. .\" `G', `M' and `K' (case insensitive) can be followed after `low.' Or Try an arithmetic expansion of shell which is defined by POSIX. For example, $((10 * 1024 * 1024)) for 10M. You can also specify the duration (`second') which is equivalent to the `mfs' mode. . .TP .B create=pmfs[:second] Selects a writable branch where the parent dir exists, such as tdp mode. When the parent dir exists on multiple writable branches, aufs selects the one which has most free space, such as mfs mode. . .TP .B create=pmfsrr:low[:second] Firstly selects a writable branch as the `pmfs mode.' If there are less than `low' bytes available on all branches where the parent dir exists, aufs selects the one which has the most free space regardless the parent dir. .SS Policies for Copy\-Up . .TP .B cpup=tdp | top\-down\-parent Equivalent to the same named policy for create. This is the default policy. . .TP .B cpup=bup | bottom\-up\-parent Selects the writable branch where the parent dir exists and the branch is nearest upper one from the copyup\-source. . .TP .B cpup=bu | bottom\-up Selects the nearest upper writable branch from the copyup\-source, regardless the existence of the parent dir. .\" ---------------------------------------------------------------------- .SH Exporting Aufs via NFS Aufs is supporting NFS\-exporting. Since aufs has no actual block device, you need to add NFS `fsid' option at exporting. Refer to the manual of NFS about the detail of this option. There are some limitations or requirements. .RS .Bu The branch filesystem must support NFS\-exporting. .Bu NFSv2 is not supported. When you mount the exported aufs from your NFS client, you will need to some NFS options like v3 or nfsvers=3, especially if it is nfsroot. .Bu If the size of the NFS file handle on your branch filesystem is large, aufs will not be able to handle it. The maximum size of NFSv3 file handle for a filesystem is 64 bytes. Aufs uses 24 bytes for 32bit system, plus 12 bytes for 64bit system. The rest is a room for a file handle of a branch filesystem. .Bu The External Inode Number Bitmap, Translation Table and Generation Table (xino) is required since NFS file handle is based upon inode number. The mount option `xino' is enabled by default. The external inode generation table and its debugfs entry (/aufs/si_*/xigen) is created when CONFIG_AUFS_EXPORT is enabled even if you don't export aufs actually. The size of the external inode generation table grows only, never be truncated. You might need to pay attention to the free space of the filesystem where xino files are placed. By default, it is the first writable branch. .Bu The branch filesystems must be accessible, which means `not hidden.' It means you need to `mount \-\-move' when you use initramfs and switch_root(8), or chroot(8). .Bu Since aufs has several filename prefixes reserved, the maximum filename length is shorter than ordinary 255. Actually \*[AUFS_MAX_NAMELEN] (defined as ${AUFS_MAX_NAMELEN}). This value should be specified as `namlen=' when you mount NFS. The name of the branch top directory has another limit. When you set the module parameter `brs' to 1 (default), then you can see the branch pathname via /sys/fs/aufs/si_XXX/brNNN. Here it is printed with its branch attributes such as `=rw' or `=ro+wh'. Since all the sysfs entries have the size limit of 4096 bytes, the length of the branch path becomes shorter than 4096. Actually you can specify any branch with much longer names, but you will meet some troubles when you remount later because remounting runs the aufs mount helper internally and it tries reading /sys/fs/aufs/si_XXX/brNNN. .RE .\" ---------------------------------------------------------------------- .SH Direct I/O The Direct I/O (including Linux AIO) is a filesystem (and its backend block device) specific feature. And there is a minor problem around the aufs internal copyup. Assume you have two branches, lower RO ext2 and upper RW tmpfs. As you know ext2 supports Direct I/O, but tmpfs doesn't. When a `fileA' exists in the lower ext2, and you write something into after opening it with O_DIRECT, then aufs behaves like this if the mount option `dio' is specified. .RS .Bu The application issues open(O_DIRECT); Aufs opens the file in the lower ext2 and succeeds. .Bu The application issues write("something"); Aufs copies\-up the file from the lower ext2 to the upper tmpfs, and re-opens the file in tmpfs with O_DIRECT. It fails and returns an error. .RE This behavior may be a problem since application expects the error should be returned from the first open(2) instead of the later write(2), when the filesystem doesn't support Direct I/O. (But, in real world, I don't think there is an application which doesn't check the error from write(2). So it won't be a big problem actually). If the file exists in the upper tmpfs, the first open(2) will fail expectedly. So there is no problem in this case. But the problem may happen when the internal copyup happens and the behavior of the branch differs from each other. As long as the feature depends upon the filesystem, this problem will not be solved. So aufs sets `nodio` by default, which means all Direct I/O are disabled, and open(2) with O_DIRECT always fails. If you want to use Direct I/O AND all your writable branches support it, then specify `dio' option to make it in effect. With the similar reason, fcntl(F_SETFL, O_DIRECT) will not work for aufs file descriptor. .\" ---------------------------------------------------------------------- .SH Possible problem of the inode number in TMPFS Although it is rare to happen, TMPFS has a problem about its inode number management. Actually TMPFS does not maintain the inode number at all. Linux kernel has a global 32bit number for general use of inode number, and TMPFS uses it while most of (real) filesystem maintains its inode number by itself. The global number can wrap around regardless the inode number is still in use. This MAY cause a problem. For instance, when /your/tmpfs/fileA has 10 as its inode number, the same value (10) may be assigned to a newly created file /your/tmpfs/fileB. Some applications do not care the duplicated inode numbers, but others, including AUFS, will be really confused by this situation. If your writable branch FS is TMPFS and the inode number wraps around, aufs will not work correctly. It is recommended to use one of FS on HDD, ramdisk+ext2 or tmpfs+FSimage+loopback mount, as your writable branch FS. Or apply a patch in aufs4\-standalone.git. It addresses this tmpfs-inum-assignment problem by modifying the source file other than aufs. .\" ---------------------------------------------------------------------- .SH Dentry and Inode Caches If you want to clear caches on your system, there are several tricks for that. If your system ram is low, try `find /large/dir \-ls > /dev/null'. It will read many inodes and dentries and cache them. Then old caches will be discarded. But when you have large ram or you do not have such large directory, it is not effective. If you want to discard cache within a certain filesystem, try `mount \-o remount /your/mntpnt'. Some filesystem may return an error of EINVAL or something, but VFS discards the unused dentry/inode caches on the specified filesystem. .\" ---------------------------------------------------------------------- .SH Compatible/Incompatible with Unionfs Version 1.x Series .\" If you compile aufs with \-DCONFIG_AUFS_COMPAT, dirs= option and =nfsro .\" branch permission flag are available. They are interpreted as .\" br: option and =ro flags respectively. .\" `debug', `delete', `imap' options are ignored silently. When you .\" compile aufs without \-DCONFIG_AUFS_COMPAT, these three options are .\" also ignored, but a warning message is issued. Ignoring `delete' option, and to keep filesystem consistency, aufs tries writing something to only one branch in a single systemcall. It means aufs may copyup even if the copyup\-src branch is specified as writable. For example, you have two writable branches and a large regular file on the lower writable branch. When you issue rename(2) to the file on aufs, aufs may copyup it to the upper writable branch. If this behavior is not what you want, then you should rename(2) it on the lower branch directly. And there is a simple shell script `unionctl' under sample subdirectory, which is compatible with unionctl(8) in Unionfs Version 1.x series, except \-\-query action. This script executes mount(8) with `remount' option and uses add/del/mod aufs mount options. If you are familiar with Unionfs Version 1.x series and want to use unionctl(8), you can try this script instead of using mount \-o remount,... directly. Aufs does not support ioctl(2) interface. This script is highly depending upon mount(8) in util\-linux\-2.12p package, and you need to mount /proc to use this script. If your mount(8) version differs, you can try modifying this script. It is very easy. The unionctl script is just for a sample usage of aufs remount interface. Aufs uses the external inode number bitmap and translation table by default. The default branch permission for the first branch is `rw', and the rest is `ro.' The whiteout is for hiding files on lower branches. Also it is applied to stop readdir going lower branches. The latter case is called `opaque directory.' Any whiteout is an empty file, it means whiteout is just an mark. In the case of hiding lower files, the name of whiteout is `\*[AUFS_WH_PFX].' And in the case of stopping readdir, the name is `\*[AUFS_WH_PFX]\*[AUFS_WH_PFX].opq'. .\" or .\" `\*[AUFS_WH_PFX]__dir_opaque.' The name depends upon your compile .\" configuration .\" CONFIG_AUFS_COMPAT. .\" All of newly created or renamed directory will be opaque. All whiteouts are hardlinked, including `/\*[AUFS_WH_BASE].' The hardlink on an ordinary (disk based) filesystem does not consume inode resource newly. But in linux tmpfs, the number of free inodes will be decremented by link(2). It is recommended to specify nr_inodes option to your tmpfs if you meet ENOSPC. Use this option after checking by `df \-i.' When you rmdir or rename\-to the dir who has a number of whiteouts, aufs rename the dir to the temporary whiteout-ed name like `\*[AUFS_WH_PFX]\*[AUFS_WH_PFX].<\*[AUFS_WH_TMP_LEN]\-digits hex>.' Then remove it after actual operation. cf. mount option `dirwh.' .\" ---------------------------------------------------------------------- .SH Incompatible with an Ordinary Filesystem stat(2) returns the inode info from the first existence inode among the branches, except the directory link count. Aufs computes the directory link count larger than the exact value usually, in order to keep UNIX filesystem semantics, or in order to shut find(1) mouth up. The size of a directory may be wrong too, but it has to do no harm. The timestamp of a directory will not be updated when a file is created or removed under it, and it was done on a lower branch. The test for permission bits has two cases. One is for a directory, and the other is for a non\-directory. In the case of a directory, aufs checks the permission bits of all existing directories. It means you need the correct privilege for the directories including the lower branches. The test for a non\-directory is more simple. It checks only the topmost inode. statfs(2) returns the information of the first branch info except namelen when `nosum' is specified (the default). The namelen is decreased by the whiteout prefix length. Although the whiteout prefix is essentially `\*[AUFS_WH_PFX]', to support rmdir(2) and rename(2) (when the target directory already existed), the namelen is decreased more since the name will be renamed to `\*[AUFS_WH_PFX]\*[AUFS_WH_PFX].<\*[AUFS_WH_TMP_LEN]\-digits hex>' as previously described. And the block size may differ from st_blksize which is obtained by stat(2). The whiteout prefix (\*[AUFS_WH_PFX]) is reserved on all branches. Users should not handle the filename begins with this prefix. In order to future whiteout, the maximum filename length is limited by the longest value \- \*[AUFS_WH_PFX_LEN] * 2 \- 1 \- \*[AUFS_WH_TMP_LEN] = \*[AUFS_MAX_NAMELEN]. It means you cannot handle such long name in aufs, even if it surely exists on the underlying branch fs. The readdir(3)/getdents(2) call show you such name, but the d_type is set to DT_UNKNOWN. It may be a violation of POSIX. Remember, seekdir(3) and telldir(3) are not defined in POSIX. They may not work as you expect. Try rewinddir(3) or re-open the dir. If you dislike the difference between the aufs entries in /etc/mtab and /proc/mounts, and if you are using mount(8) in util\-linux package, then try ./mount.aufs utility. Copy the script to /sbin/mount.aufs. This simple utility tries updating /etc/mtab. If you do not care about /etc/mtab, you can ignore this utility. Remember this utility is highly depending upon mount(8) in util\-linux\-2.12p package, and you need to mount /proc. Since aufs uses its own inode and dentry, your system may cache huge number of inodes and dentries. It can be as twice as all of the files in your union. It means that unmounting or remounting readonly at shutdown time may take a long time, since mount(2) in VFS tries freeing all of the cache on the target filesystem. When you open a directory, aufs will open several directories internally. It means you may reach the limit of the number of file descriptor. And when the lower directory cannot be opened, aufs will close all the opened upper directories and return an error. The sub\-mount under the branch of local filesystem is ignored. For example, if you have mount another filesystem on /branch/another/mntpnt, the files under `mntpnt' will be ignored by aufs. It is recommended to mount the sub\-mount under the mounted aufs. For example, .nf # sudo mount /dev/sdaXX /ro_branch # d=another/mntpnt # sudo mount /dev/sdbXX /ro_branch/$d # mkdir -p /rw_branch/$d # sudo mount -t aufs -o br:/rw_branch:/ro_branch none /aufs # sudo mount -t aufs -o br:/rw_branch/${d}:/ro_branch/${d} none /aufs/another/$d .fi There are several characters which are not allowed to use in a branch directory path and xino filename. See detail in Branch Syntax and Mount Option. The file\-lock which means fcntl(2) with F_SETLK, F_SETLKW or F_GETLK, flock(2) and lockf(3), is applied to virtual aufs file only, not to the file on a branch. It means you can break the lock by accessing a branch directly. TODO: check `security' to hook locks, as inotify does. Aufs respects all "security" hooks in kernel, so you can configure LSM for both of virtual aufs files and real branch\-fs files. But there is one exception, it is the kernel function "security_mmap_file()." The function called inside aufs for a branch\-fs file may cause a deadlock, so aufs stops calling it. LSM settings for the virtual aufs files works as usual. The I/O to the named pipe or local socket are not handled by aufs, even if it exists in aufs. After the reader and the writer established their connection if the pipe/socket are copied\-up, they keep using the old one instead of the copied\-up one. The fsync(2) and fdatasync(2) systemcalls return 0 which means success, even if the given file descriptor is not opened for writing. I am afraid this behavior may violate some standards. Checking the behavior of fsync(2) on ext2, aufs decided to return success. If you want to use disk quota, you should set it up to your writable branch since aufs does not have its own block device. When your aufs is the root directory of your system, and your system tells you some of the filesystem were not unmounted cleanly, try these procedure when you shutdown your system. .nf # mount -no remount,ro / # for i in $writable_branches # do mount -no remount,ro $i # done .fi If your xino file is on a hard drive, you also need to specify `noxino' option or `xino=/your/tmpfs/xino' at remounting root directory. To rename(2) directory may return EXDEV even if both of src and tgt are on the same aufs, when `dirren' is not specified. When the rename\-src dir exists on multiple branches and the lower dir has child/children, aufs has to copyup all his children. It can be recursive copyup. Current aufs does not support such huge copyup operation at one time in kernel space, instead produces a warning and returns EXDEV. Generally, mv(1) detects this error and tries mkdir(2) and rename(2) or copy/unlink recursively. So the result is harmless. If your application which issues rename(2) for a directory does not support EXDEV, it will not work on aufs. Also this specification is applied to the case when the src directory exists on the lower readonly branch and it has child/children. While it is rare, users can open a removed file with a little help from procfs. .RS .Bu open a file and get its descriptor .Bu remove the file .Bu generate a string `/proc/PID/fd/N' .Bu open the same file using the generated string .Bu .RE This operation is a little difficult for aufs since aufs allows the direct access to branches (by\-passing aufs), and it is hard to distinguish the case of this. .RS .Bu remove a file on a branch directly (by\-passing aufs) .Bu open the file via aufs .RE For the latter case, aufs detects the unmatching status between aufs cached info and the real info from the branch, and tries refreshing by re-lookup. Finally aufs finds the file is removed and let open(2) return an error. For the former case, currently (linux\-3.13\-rc7), aufs simply follows the behavior of ext2 which supports for opening a non\-directory but returns an error for a directory. Other than open(2), users may chmod(2) and chown(2) similarly (remove the file and then operate it via procfs). Ext2 supports them too, but aufs doesn't. I don't think it a big disadvantage since users can fchmod(2) and fchown(2) instead. If a sudden accident such like a power failure happens during aufs is performing, and regular fsck for branch filesystems is completed after the disaster, you need to extra fsck for aufs writable branches. It is necessary to check whether the whiteout remains incorrectly or not, eg. the real filename and the whiteout for it under the same parent directory. If such whiteout remains, aufs cannot handle the file correctly. To check the consistency from the aufs' point of view, you can use a simple shell script called /sbin/auchk. Its purpose is a fsck tool for aufs, and it checks the illegal whiteout, the remained pseudo\-links and the remained aufs\-temp files. If they are found, the utility reports you and asks whether to delete or not. It is recommended to execute /sbin/auchk for every writable branch filesystem before mounting aufs if the system experienced crash. In linux\-v4.5, copy_file_range(2) is introduced and aufs supports it. The systemcall supports only when the given two files exist on the same filesystem. In aufs world, two files must exist on the same physical filesystem, not on the logical aufs. The case of two files existing on the logically same aufs but physically different file system is not supported. For example, fileA and fileB are given, and fileA exists on the lower readonly branch in aufs, and fileB exists on the upper writable branch. When these two branches exist on the same filesystem, then aufs copy_file_range(2) should work. Otherwise it will return an error. In other words, aufs copy_file_range(2) doesn't incur the internal copyup since such behaviour doesn't fit the original purpose of copy_file_range(2). .\" ---------------------------------------------------------------------- .SH EXAMPLES The mount options are interpreted from left to right at remount-time. These examples shows how the options are handled. (assuming /sbin/mount.aufs was installed) .nf # mount -v -t aufs br:/day0:/base none /u none on /u type aufs (rw,xino=/day0/.aufs.xino,br:/day0=rw:/base=ro) # mount -v -o remount,\\ prepend:/day1,\\ xino=/day1/xino,\\ mod:/day0=ro,\\ del:/day0 \\ /u none on /u type aufs (rw,xino=/day1/xino,br:/day1=rw:/base=ro) .fi .nf # mount -t aufs br:/rw none /u # mount -o remount,append:/ro /u different uid/gid/permission, /ro # mount -o remount,del:/ro /u # mount -o remount,nowarn_perm,append:/ro /u # (there is no warning) .fi .\" If you want to expand your filesystem size, aufs may help you by .\" adding an writable branch. Since aufs supports multiple writable .\" branches, the old writable branch can be being writable, if you want. .\" In this example, any modifications to the files under /ro branch will .\" be copied-up to /new, but modifications to the files under /rw branch .\" will not. .\" And the next example shows the modifications to the files under /rw branch .\" will be copied-up to /new/a. .\" .\" TODO: test multiple writable branches policy. cpup=nearest, cpup=exist_parent. .\" .\" .nf .\" # mount -v -t aufs br:/rw:/ro none /u .\" none on /u type aufs (rw,xino=/rw/.aufs.xino,br:/rw=rw:/ro=ro) .\" # mkfs /new .\" # mount -v -o remount,add:1:/new=rw /u .\" none on /u type aufs (rw,xino=/rw/.aufs.xino,br:/rw=rw:/new=rw:/ro=ro) .\" .fi .\" .\" .nf .\" # mount -v -t aufs br:/rw:/ro none /u .\" none on /u type aufs (rw,xino=/rw/.aufs.xino,br:/rw=rw:/ro=ro) .\" # mkfs /new .\" # mkdir /new/a new/b .\" # mount -v -o remount,add:1:/new/b=rw,prepend:/new/a,mod:/rw=ro /u .\" none on /u type aufs (rw,xino=/rw/.aufs.xino,br:/new/a=rw:/rw=ro:/new/b=rw:/ro=ro) .\" .fi When you use aufs as root filesystem, it is recommended to consider to exclude some directories. For example, /tmp and /var/log are not need to stack in many cases. They do not usually need to copyup or to whiteout. Also the swapfile on aufs (a regular file, not a block device) is not supported. In order to exclude the specific dir from aufs, try bind mounting. And there is a good sample which is for network booted diskless machines. See sample/ in detail. .\" ---------------------------------------------------------------------- .SH DIAGNOSTICS When you add a branch to your union, aufs may warn you about the privilege or security of the branch, which is the permission bits, owner and group of the top directory of the branch. For example, when your upper writable branch has a world writable top directory, a malicious user can create any files on the writable branch directly, like copyup and modify manually. I am afraid it can be a security issue. When you mount or remount your union without \-o ro common mount option and without writable branch, aufs will warn you that the first branch should be writable. .\" It is discouraged to set both of `udba' and `noxino' mount options. In .\" this case the inode number under aufs will always be changed and may .\" reach the end of inode number which is a maximum of unsigned long. If .\" the inode number reaches the end, aufs will return EIO repeatedly. When you set udba other than notify and change something on your branch filesystem directly, later aufs may detect some mismatches to its cache. If it is a critical mismatch, aufs returns EIO. When an error occurs in aufs, aufs prints the kernel message with `errno.' The priority of the message (log level) is ERR or WARNING which depends upon the message itself. You can convert the `errno' into the error message by perror(3), strerror(3) or something. For example, the `errno' in the message `I/O Error, write failed (\-28)' is 28 which means ENOSPC or `No space left on device.' When CONFIG_AUFS_BR_RAMFS is enabled, you can specify ramfs as an aufs branch. Since ramfs is simple, it does not set the maximum link count originally. In aufs, it is very dangerous, particularly for whiteouts. Finally aufs sets the maximum link count for ramfs. The value is 32000 which is borrowed from ext2. After you prepend a branch which already has some entries, aufs may report an I/O Error with "brabra should be negative" or something. For instance, you are going to open(2) a regular file in aufs and write(2) something to it. If you prepend a branch between open(2) and write(2), and the added branch already has a same named entry other than a regular file, then you get a conflict. .RS .Bu a regular file FOO exists in aufs. .Bu open the file FOO. .Bu add a branch which has FOO but it is a directory, and change the permission of the old branch to RO. .Bu write to the file FOO. .Bu aufs tries copying\-up FOO to the upper writable branch which was recently added. .Bu aufs finds a directory FOO on the upper branch, and returns an error. .RE In this situation, aufs keeps returning an error during FOO is cached in memory because it remembers that FOO is a regular file instead of a directory. When the system discards the cache about FOO, then you will see the directory FOO. In other words, you will not be able to see the directory FOO on the newly added branch during the file FOO on the lower branch is in use. This situation may invite more complicated issue. If you unlink(2) the opened file FOO, then aufs will create a whiteout on the upper writable branch. And you get another conflict which is coexisting a whiteout and a real entry on the same branch. In this case, aufs also keeps returning an error when you try using FOO. .\" .SH Current Limitation . .\" ---------------------------------------------------------------------- .\" SYNOPSIS .\" briefly describes the command or function's interface. For commands, this .\" shows the syntax of the command and its arguments (including options); bold- .\" face is used for as-is text and italics are used to indicate replaceable .\" arguments. Brackets ([]) surround optional arguments, vertical bars (|) .\" separate choices, and ellipses (...) can be repeated. For functions, it shows .\" any required data declarations or #include directives, followed by the .\" function declaration. . .\" DESCRIPTION .\" gives an explanation of what the command, function, or format does. Discuss .\" how it interacts with files and standard input, and what it produces on .\" standard output or standard error. Omit internals and implementation .\" details unless they're critical for understanding the interface. Describe .\" the usual case; for information on options use the OPTIONS section. If .\" there is some kind of input grammar or complex set of subcommands, consider .\" describing them in a separate USAGE section (and just place an overview in .\" the DESCRIPTION section). . .\" RETURN VALUE .\" gives a list of the values the library routine will return to the caller and .\" the conditions that cause these values to be returned. . .\" EXIT STATUS .\" lists the possible exit status values or a program and the conditions that .\" cause these values to be returned. . .\" USAGE .\" describes the grammar of any sublanguage this implements. . .\" FILES .\" lists the files the program or function uses, such as configuration files, .\" startup files, and files the program directly operates on. Give the full .\" pathname of these files, and use the installation process to modify the .\" directory part to match user preferences. For many programs, the default .\" installation location is in /usr/local, so your base manual page should use .\" /usr/local as the base. . .\" ENVIRONMENT .\" lists all environment variables that affect your program or function and how .\" they affect it. . .\" SECURITY .\" discusses security issues and implications. Warn about configurations or .\" environments that should be avoided, commands that may have security .\" implications, and so on, especially if they aren't obvious. Discussing security .\" in a separate section isn't necessary; if it's easier to understand, place .\" security information in the other sections (such as the DESCRIPTION or USAGE .\" section). However, please include security information somewhere! . .\" CONFORMING TO .\" describes any standards or conventions this implements. . .\" NOTES .\" provides miscellaneous notes. . .\" BUGS .\" lists limitations, known defects or inconveniences, and other questionable .\" activities. .SH COPYRIGHT Copyright \(co 2005\-2017 Junjiro R. Okajima .SH AUTHOR Junjiro R. Okajima .\" SEE ALSO .\" lists related man pages in alphabetical order, possibly followed by other .\" related pages or documents. Conventionally this is the last section. aufs.shlib000066400000000000000000000036231315652647700130630ustar00rootroot00000000000000# # Copyright (C) 2005-2011 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # library functions for aufs shell scripts # path in canonical representation # note: bash builtin "pwd -P" modies $PWD unexpectedly SetDir() # var dir { cd "$2" eval "$1=\"$(pwd -P)\"" cd "$OLDPWD" } # escape the unprintable characters, mainly for grep-ping /proc/mounts Esc() # [-e] { sed -r -e ' s/\\/\\134/g s/$/\\012/ ' | tr -d '\n' | sed -r -e ' s/ /\\040/g s/\t/\\011/g s/\r/\\015/g s/\\012$// ' | { test $# -eq 1 && test "$1" = "-e" && sed -r -e 's/\\/\\\\/g' || cat; } echo } # find a mount-entry by its mount-point FindMntEnt() # mntpnt { proc_mounts=/proc/self/mounts test ! -e $proc_mounts && proc_mounts=/proc/$$/mounts test ! -e $proc_mounts && proc_mounts=/proc/mounts fgrep \ $(echo "$1" | Esc)\ aufs\ $proc_mounts | tail -n 1 } # current mount options MntOpts() # mntpnt { FindMntEnt "$1" | cut -f4 -d' ' } ######################################## AuDebug() # 1 | 0 [sec] { test $1 -eq 0 && set +x aufs_debug=/sys/module/aufs/parameters/debug if [ -f $aufs_debug ] then echo $1 | sudo dd of=$aufs_debug 2> /dev/null test $# -eq 2 && sleep $2 fi test $1 -eq 1 && set -x true } # Local variables: ; # mode: text; # End: ; auibusy.c000066400000000000000000000042061315652647700127250ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include static void usage(char *me) { fprintf(stderr, "usage: %s mntpnt bindex [inum ...]\n", me); } static int do_ibusy(char *inum, int fd, struct aufs_ibusy *ibusy) { int err; err = -1; errno = 0; ibusy->ino = strtoul(inum, NULL, 0); if (errno) goto out; err = 0; if (ibusy->ino == AUFS_ROOT_INO) goto out; err = ioctl(fd, AUFS_CTL_IBUSY, ibusy); if (!err && ibusy->h_ino) printf("i%llu\tb%d\thi%llu\n", (unsigned long long)ibusy->ino, ibusy->bindex, (unsigned long long)ibusy->h_ino); out: return err; } int main(int argc, char *argv[]) { int err, fd, i; struct aufs_ibusy ibusy; char a[16], *eprefix; DIR *dp; err = -1; errno = EINVAL; eprefix = argv[0]; if (argc < 3) { usage(argv[0]); goto out; } eprefix = argv[1]; dp = opendir(argv[1]); if (!dp) goto out; fd = dirfd(dp); eprefix = argv[2]; errno = 0; ibusy.bindex = strtoul(argv[2], NULL, 0); if (errno) goto out; if (argc > 3) { for (i = 3; i < argc; i++) { eprefix = argv[i]; err = do_ibusy(argv[i], fd, &ibusy); if (err) break; } } else { eprefix = a; while (fgets(a, sizeof(a), stdin)) { err = do_ibusy(a, fd, &ibusy); if (err) break; } } out: if (err) perror(eprefix); return err; } aumvdown.8000066400000000000000000000061021315652647700130260ustar00rootroot00000000000000.\".so aufs.tmac . .eo .de TQ .br .ns .TP \$1 .. .de Bu .IP \(bu 4 .. .ec .\" end of macro definitions . .\" ---------------------------------------------------------------------- .TH AUMVDOWN 8 "aufs3.9 and later" Linux "Linux Aufs User's Manual" .SH NAME aumvdown \- moves-down a file between aufs branches .\" ---------------------------------------------------------------------- .SH SYNOPSIS .SY aumvdown .OP options .RI file_in_aufs .IR .\|.\|. .YS .\" ---------------------------------------------------------------------- .SH DESCRIPTION "aumvdown" command finds the first branch where the given file exists, copies it to the next lower writable branch, and then removes the file on the first branch. There are several conditions to achieve the operation. .RS .Bu It is obvious that when the file exists between the first and the next lower writable branches, or hidden by the whiteout or directory's opaque-ness, then the operation will fail. Because it breaks the consistency in aufs world. .Bu If a "opaque"-ed directory exists in the ancestors of the file and it would hide the moved-down file, then the operation will fail. Similary if a whiteout (in effect) exists on any branch betwee source and destination branch and it would hide the moved-down file, then the operation will fail too. Note that the whiteout on "=ro" branch is not in effect, but "=ro+wh" branch. .Bu Files in use (both in aufs world and in branch filesystem) are not be handled. .Bu Hard-linked files are not be handled. For such file, remount aufs or use \fBauplink\fP(8) or \fBaubrsync\fP(8). .Bu Removed files are not be handled since it has no name. .Bu Directories are not be handled since the action doesn't behave recursively and the moving-down an empty dir has less meaning. .RE .\" ---------------------------------------------------------------------- .SH OPTIONS . .TP .B \-b | \-\-lower\-branch\-id .B \-B | \-\-upper\-branch\-id Specify the source/target branch by its id. You can obtain the branch id via sysfs. . .TP .B \-i | \-\-interactive Prompt to the user for every given file. . .TP .B \-k | \-\-keep\-upper Don't unlink the upper source file after copying-down. . .TP .B \-o | \-\-overwrite\-lower Overwrite the lower target file if it exists. . .TP .B \-r | \-\-allow\-ro\-lower Do the operation even if the lower branch is marked as readonly in aufs world ("=ro"). Note that the natively readonly filesystems (mounted with "\-o ro") are not operatable still even if you specify this option. . .TP .B \-R | \-\-allow\-ro\-upper Do the operation even if the upper branch is marked as readonly in aufs world ("=ro"). Note that the natively readonly filesystems (mounted with "\-o ro") are not operatable still even if you specify this option. . .TP .B \-v | \-\-verbose Make it verbose particularly for the error cases. . .TP .B \-h | \-\-help Shows the command syntax. .\" . .\" .TP .\" .B \-V | \-\-version .\" ---------------------------------------------------------------------- .SH SEE ALSO .\" \fBaubrsync\fP(8) \fBaufs\fP(5) .SH COPYRIGHT Copyright \(co 2011\-2015 Junjiro R. Okajima .SH AUTHOR Junjiro R. Okajima aumvdown.c000066400000000000000000000135031315652647700131040ustar00rootroot00000000000000/* * Copyright (C) 2011-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #ifndef __GNU_LIBRARY__ /* musl libc conflicts with */ #undef NGROUPS_MAX #endif #include #include "au_util.h" enum { INTERACTIVE = 1, VERBOSE = (1 << 1), }; static struct option opts[] __attribute__((unused)) = { {"lower-branch-id", required_argument, NULL, 'b'}, {"upper-branch-id", required_argument, NULL, 'B'}, {"interactive", no_argument, NULL, 'i'}, {"keep-upper", no_argument, NULL, 'k'}, {"overwrite-lower", no_argument, NULL, 'o'}, {"allow-ro-lower", no_argument, NULL, 'r'}, {"allow-ro-upper", no_argument, NULL, 'R'}, {"verbose", no_argument, NULL, 'v'}, {"version", no_argument, NULL, 'V'}, {"help", no_argument, NULL, 'h'}, /* hidden */ {"dmsg", no_argument, NULL, 'd'}, {"stfs", no_argument, NULL, 's'}, {NULL, no_argument, NULL, 0} }; #define OPTS_FORM "b:B:ikorRvVh" "ds" static __attribute__((unused)) void usage(void) { fprintf(stderr, "usage: %s [options] file ...\n" "move-down the specified file (an opposite action of copy-up)\n" "from the highest branch where the file exist to the next\n" "lower writable branch.\n" "options:\n" "-b | --lower-branch-id brid\n" "-B | --upper-branch-id brid\n" "-i | --interactive\n" "-k | --keep-upper\n" "-o | --overwrite-lower\n" "-r | --allow-ro-lower\n" "-R | --allow-ro-upper\n" "-v | --verbose\n" "-V | --version\n" AuVersion "\n", program_invocation_short_name); } static __attribute__((unused)) long cvt(char *str) { long ret; errno = 0; ret = strtol(str, NULL, 10); if ((ret == LONG_MAX || ret == LONG_MIN) && errno) ret = -1; return ret; } static __attribute__((unused)) void pr_stbr(struct aufs_stbr *stbr) { printf("b%d %d%%(%llu/%llu), %d%%(%llu/%llu) free\n", stbr->bindex, (int)(stbr->stfs.f_bavail * 100.0 / stbr->stfs.f_blocks), (unsigned long long)stbr->stfs.f_bavail, (unsigned long long)stbr->stfs.f_blocks, (int)(stbr->stfs.f_ffree * 100.0 / stbr->stfs.f_files), (unsigned long long)stbr->stfs.f_ffree, (unsigned long long)stbr->stfs.f_files); } #define AuMvDownFin(mvdown, str) do { \ static int e; \ static char a[1024]; \ e = errno; \ snprintf(a, sizeof(a), "%s:%d: %s", \ __FILE__, __LINE__, str); \ errno = e; \ au_errno = (mvdown)->au_errno; \ au_perror(a); \ if (errno) \ exit(errno); \ } while (0) int main(int argc, char *argv[]) { int err, fd, i, c; unsigned int user_flags; struct aufs_mvdown mvdown = { .flags = 0 }; err = 0; user_flags = 0; i = 0; while ((c = getopt_long(argc, argv, OPTS_FORM, opts, &i)) != -1) { switch (c) { case 'b': err = cvt(optarg); if (err < 0) { perror(optarg); goto out; } mvdown.flags |= AUFS_MVDOWN_BRID_LOWER; mvdown.stbr[AUFS_MVDOWN_LOWER].brid = err; break; case 'B': err = cvt(optarg); if (err < 0) { perror(optarg); goto out; } mvdown.flags |= AUFS_MVDOWN_BRID_UPPER; mvdown.stbr[AUFS_MVDOWN_UPPER].brid = err; break; case 'i': user_flags |= INTERACTIVE; break; case 'k': mvdown.flags |= AUFS_MVDOWN_KUPPER; break; case 'o': mvdown.flags |= AUFS_MVDOWN_OWLOWER; break; case 'r': mvdown.flags |= AUFS_MVDOWN_ROLOWER; break; case 'R': mvdown.flags |= AUFS_MVDOWN_ROUPPER; break; case 'v': user_flags |= VERBOSE; break; case 'V': fprintf(stderr, AuVersion "\n"); goto out; /* hidden */ case 'd': mvdown.flags |= AUFS_MVDOWN_DMSG; break; case 's': mvdown.flags |= AUFS_MVDOWN_STFS; break; case 'h': default: usage(); goto out; } } err = EINVAL; if (optind == argc) { usage(); goto out; } for (i = optind; i < argc; i++) { if (user_flags & INTERACTIVE) { fprintf(stderr, "move down '%s'? ", argv[i]); fflush(stderr); c = fgetc(stdin); c = toupper(c); if (c != 'Y') continue; } fd = open(argv[i], O_RDONLY); if (fd < 0) AuMvDownFin(&mvdown, argv[i]); err = ioctl(fd, AUFS_CTL_MVDOWN, &mvdown); if (err) AuMvDownFin(&mvdown, argv[i]); if (user_flags & VERBOSE) { char *u = "", *l = ""; if (mvdown.flags & AUFS_MVDOWN_ROLOWER_R) l = "(RO)"; if (mvdown.flags & AUFS_MVDOWN_ROUPPER_R) u = "(RO)"; printf("'%s' b%d(brid%d)%s --> b%d(brid%d)%s\n", argv[i], mvdown.stbr[AUFS_MVDOWN_UPPER].bindex, mvdown.stbr[AUFS_MVDOWN_UPPER].brid, u, mvdown.stbr[AUFS_MVDOWN_LOWER].bindex, mvdown.stbr[AUFS_MVDOWN_LOWER].brid, l); if (mvdown.flags & AUFS_MVDOWN_STFS) { if (!(mvdown.flags & AUFS_MVDOWN_STFS_FAILED)) { pr_stbr(mvdown.stbr + AUFS_MVDOWN_UPPER); pr_stbr(mvdown.stbr + AUFS_MVDOWN_LOWER); } else { fprintf(stderr, "STFS failed, ignored\n"); fflush(stderr); } } } err = close(fd); if (err) AuMvDownFin(&mvdown, argv[i]); } out: return err; } auplink.c000066400000000000000000000033771315652647700127170ustar00rootroot00000000000000/* * Copyright (C) 2005-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include "au_util.h" static void usage(char *me) { fprintf(stderr, "usage: %s aufs_mount_point list|cpup|flush\n" "'list' shows the pseudo-linked inode numbers and filenames.\n" "'cpup' copies-up all pseudo-link to the writeble branch.\n" "'flush' calls 'cpup', and then 'mount -o remount,clean_plink=inum'\n" "and remove the whiteouted plink.\n" AuVersion "\n", me); exit(EINVAL); } int main(int argc, char *argv[]) { int err, cmd; char *cwd; if (argc != 3) usage(argv[0]); if (!strcmp(argv[2], "flush")) cmd = AuPlink_FLUSH; else if (!strcmp(argv[2], "list")) cmd = AuPlink_LIST; else if (!strcmp(argv[2], "cpup")) cmd = AuPlink_CPUP; else { errno = EINVAL; AuFin("%s", argv[2]); cmd = 0; /* never reach here */ } err = chdir(argv[1]); if (err) AuFin("chdir"); cwd = getcwd(NULL, 0); /* glibc */ if (!cwd) AuFin("getcwd"); return au_plink(cwd, cmd, AuPlinkFlag_OPEN, /*fd*/NULL); } auplink_ftw000077500000000000000000000024101315652647700133440ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2016-2017 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # usage: $0 inum_list dir list|cpup tmp=/tmp/$$ set -eu rc=${DebugRc:-/etc/default/aufs} . $rc inum=$1 dir=$2 action=$3 # build the grep pattern sed -e 's/^/^/' $inum > $tmp.inum Find() { find $dir -xdev -name $AUFS_WH_PLINKDIR -prune \ -o -printf "%i %p\0" | #2> /dev/null | grep -z -w -f $tmp.inum | sed -e 's/^[0-9][0-9]* //g' -e 's/\x00[0-9][0-9]* /\x00/g' } err=0 case $3 in list) Find | tr '\0' '\n' ;; cpup) Find | xargs -r0 touch -ac ;; *) echo Usage err=1 ;; esac rm -fr $tmp $tmp.* exit $err br.c000066400000000000000000000047131315652647700116520ustar00rootroot00000000000000/* * Copyright (C) 2005-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include "au_util.h" int au_br(union aufs_brinfo **brinfo, int *nbr, char *root) { int err, fd; struct statfs stfs; fd = open(root, O_RDONLY /* | O_PATH */); if (fd < 0) AuFin("%s", root); err = fstatfs(fd, &stfs); if (err) AuFin("internal error, %s", root); if (stfs.f_type != AUFS_SUPER_MAGIC) AuFin("%s is not aufs", root); *nbr = ioctl(fd, AUFS_CTL_BRINFO, NULL); if (*nbr <= 0) AuFin("internal error, %s", root); errno = posix_memalign((void **)brinfo, 4096, *nbr * sizeof(**brinfo)); if (errno) AuFin("posix_memalign"); err = ioctl(fd, AUFS_CTL_BRINFO, *brinfo); if (err) AuFin("AUFS_CTL_BRINFO"); err = close(fd); if (err) AuFin("internal error, %s", root); return 0; } #ifdef AUFHSM int au_nfhsm(int nbr, union aufs_brinfo *brinfo) { int nfhsm, i; nfhsm = 0; for (i = 0; i < nbr; i++) if (au_br_fhsm(brinfo[i].perm)) nfhsm++; return nfhsm; } int au_br_qsort_path(const void *_a, const void *_b) { const union aufs_brinfo *a = _a, *b = _b; return strcmp(a->path, b->path); } void au_br_sort_path(int nbr, union aufs_brinfo *brinfo) { qsort(brinfo, nbr, sizeof(*brinfo), au_br_qsort_path); } int au_br_bsearch_path(const void *_path, const void *_brinfo) { char *path = (char *)_path; const union aufs_brinfo *brinfo = _brinfo; return strcmp(path, brinfo->path); } union aufs_brinfo *au_br_search_path(char *path, int nbr, union aufs_brinfo *brinfo) { return bsearch((void *)path, brinfo, nbr, sizeof(*brinfo), au_br_bsearch_path); } #endif c2sh.c000066400000000000000000000025271315652647700121070ustar00rootroot00000000000000/* * Copyright (C) 2005-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #define AUFS_WH_PFX2 AUFS_WH_PFX AUFS_WH_PFX int main(int argc, char *argv[]) { #define p(m, v, fmt) printf("%s=" fmt "\n", m, v) #define pstr(m) p(#m, m, "%s") #define pint(m) p(#m, m, "%d") pstr(AUFS_VERSION); pint(AUFS_SUPER_MAGIC); printf("AUFS_SUPER_MAGIC_HEX=0x%x\n", AUFS_SUPER_MAGIC); pint(AUFS_ROOT_INO); pstr(AUFS_WH_PFX); pstr(AUFS_WH_PFX2); pint(AUFS_MAX_NAMELEN); pstr(AUFS_WKQ_NAME); pstr(AUFS_WH_DIROPQ); pstr(AUFS_WH_BASE); pstr(AUFS_WH_PLINKDIR); pstr(AUFS_WH_ORPHDIR); //pint(AUFS_BRANCH_MAX); return 0; } c2tmac.c000066400000000000000000000030761315652647700124210ustar00rootroot00000000000000 /* * Copyright (C) 2005-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include "au_util.h" #include "libau.h" int main(int argc, char *argv[]) { #define p(m, v, fmt) printf(".ds %s " fmt "\n", m, v) #define pstr(m) p(#m, m, "%s") #define pint(m) p(#m, m, "%d") pstr(AUFS_VERSION); pstr(AUFS_XINO_FNAME); pstr(AUFS_XINO_DEFPATH); pint(AUFS_DIRWH_DEF); pstr(AUFS_WH_PFX); pint(AUFS_WH_PFX_LEN); pint(AUFS_WH_TMP_LEN); pint(AUFS_MAX_NAMELEN); pstr(AUFS_WKQ_NAME); pstr(AUFS_WH_DIROPQ); pstr(AUFS_WH_BASE); pstr(AUFS_WH_PLINKDIR); pint(AUFS_MFS_DEF_SEC); pint(AUFS_MFS_MAX_SEC); pint(AUFS_RDBLK_DEF); pint(AUFS_RDHASH_DEF); pint(AUFS_RDCACHE_DEF); pint(AUFS_RDCACHE_MAX); pstr(DROPLVL); pstr(DROPLVL1); pstr(DROPLVL1R); pstr(DROPLVL2); pstr(DROPLVL2R); pstr(DROPLVL3); pstr(DROPLVL3R); pstr(LibAuEnv); return 0; } extlib/000077500000000000000000000000001315652647700123655ustar00rootroot00000000000000extlib/glibc/000077500000000000000000000000001315652647700134455ustar00rootroot00000000000000extlib/glibc/au_nftw.c000066400000000000000000000036051315652647700152600ustar00rootroot00000000000000/* * Copyright (C) 2005-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include "au_util.h" static int ia_test(ino_t ino) { int i; ino_t *p; /* todo: hash table */ ia.p = ia.o; p = ia.cur; for (i = 0; i < ia.nino; i++) if (*p++ == ino) return 1; return 0; } int ftw_list(const char *fname, const struct stat *st, int flags, struct FTW *ftw) { if (!strcmp(fname + ftw->base, AUFS_WH_PLINKDIR)) return FTW_SKIP_SUBTREE; if (flags == FTW_D || flags == FTW_DNR) return FTW_CONTINUE; if (ia_test(st->st_ino)) puts(fname); return FTW_CONTINUE; } int ftw_cpup(const char *fname, const struct stat *st, int flags, struct FTW *ftw) { int err; if (!strcmp(fname + ftw->base, AUFS_WH_PLINKDIR)) return FTW_SKIP_SUBTREE; if (flags == FTW_D || flags == FTW_DNR) return FTW_CONTINUE; /* * do nothing but update something harmless in order to make it copyup */ if (ia_test(st->st_ino)) { Dpri("%s\n", fname); if (!S_ISLNK(st->st_mode)) err = chown(fname, -1, -1); else err = lchown(fname, -1, -1); if (err) AuFin("%s", fname); } return FTW_CONTINUE; } extlib/non-glibc/000077500000000000000000000000001315652647700142355ustar00rootroot00000000000000extlib/non-glibc/au_decode_mntpnt.c000066400000000000000000000022201315652647700177050ustar00rootroot00000000000000/* * Copyright (C) 2016-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include "au_util.h" char *au_decode_mntpnt(char *src, char *dst, int len) { char *p = dst; while (*src && len > 0) { if (isgraph(*src)) { *p++ = *src; len--; } else if (len > 4) { sprintf(p, "\\%03o", *src); p += 4; len -= 4; } else return NULL; src++; } if (len > 0) { *p = 0; return dst; } else return NULL; } extlib/non-glibc/au_nftw.c000066400000000000000000000041541315652647700160500ustar00rootroot00000000000000/* * Copyright (C) 2016-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include "au_util.h" /* dummy */ int ftw_list(const char *fname, const struct stat *st, int flags, struct FTW *ftw) { return 0; } /* dummy */ int ftw_cpup(const char *fname, const struct stat *st, int flags, struct FTW *ftw) { return 0; } int au_nftw(const char *dirpath, int (*fn) (const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf), int nopenfd, int flags) { int err, fd, i; mode_t mask; FILE *fp; ino_t *p; char *action, ftw[1024], tmp[] = "/tmp/auplink_ftw.XXXXXX"; mask = umask(S_IRWXG | S_IRWXO); fd = mkstemp(tmp); if (fd < 0) AuFin("mkstemp"); umask(mask); fp = fdopen(fd, "r+"); if (!fp) AuFin("fdopen"); ia.p = ia.o; p = ia.cur; for (i = 0; i < ia.nino; i++) { err = fprintf(fp, "%llu\n", (unsigned long long)*p++); if (err < 0) break; } err = fflush(fp) || ferror(fp); if (err) AuFin("%s", tmp); err = fclose(fp); if (err) AuFin("%s", tmp); action = "list"; if (fn == ftw_cpup) action = "cpup"; else fflush(stdout); /* inode numbers */ i = snprintf(ftw, sizeof(ftw), AUPLINK_FTW_CMD " %s %s %s", tmp, dirpath, action); if (i > sizeof(ftw)) AuFin("snprintf"); err = system(ftw); err = WEXITSTATUS(err); if (err) AuFin("%s", ftw); return err; } extlib/non-glibc/error_at_line.c000066400000000000000000000025111315652647700172240ustar00rootroot00000000000000/* * Copyright (C) 2013-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include "error_at_line.h" /* musl libc has 'program_invocation_name', but doesn't have error_at_line() */ void error_at_line(int status, int errnum, const char *filename, unsigned int linenum, const char *format, ...) { va_list ap; va_start(ap, format); fprintf(stderr, "%s:%s:%d: ", program_invocation_name, filename, linenum); vfprintf(stderr, format, ap); fprintf(stderr, ": %s\n", errnum ? strerror(errnum) : ""); va_end(ap); if (status) exit(status); } extlib/non-glibc/error_at_line.h000066400000000000000000000020341315652647700172310ustar00rootroot00000000000000/* * Copyright (C) 2013-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __ERROR_AT_LINE_H__ #define __ERROR_AT_LINE_H__ #ifdef __GNU_LIBRARY__ #error this is for non-glibc. #else void error_at_line(int status, int errnum, const char *filename, unsigned int linenum, const char *format, ...); #endif #endif /* __ERROR_AT_LINE_H__ */ fhsm/000077500000000000000000000000001315652647700120335ustar00rootroot00000000000000fhsm/Makefile000066400000000000000000000043511315652647700134760ustar00rootroot00000000000000 # Copyright (C) 2011-2015 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA UsrBin = aufhsm UsrLib = aufhsm-list aufhsmd Man5 = aufs_fhsm.5 Man8 = $(addsuffix .8, ${UsrBin} ${UsrLib}) Man = ${Man5} ${Man8} LibFhsm = libfhsm.a LibFhsmObj = event.o fhsm.o list.o log.o msg.o mvdown.o shm.o LibFhsmHdr = comm.h daemon.h linux-list.h log.h Tgt = ${UsrBin} aufhsmd TgtObj = $(addsuffix .o, ${Tgt}) all: ${Tgt} clean: ${RM} ${Tgt} ${TgtObj} ${LibFhsm} ${LibFhsmObj} *~ install: install_ubin install_ulib install_man ######################################## override CPPFLAGS += -DAUFHSM_LIST_CMD=\"/usr/lib/aufhsm-list\" override CPPFLAGS += -DAUFHSMD_CMD=\"/usr/lib/aufhsmd\" ${LibFhsmObj}: %.o: %.c ${LibFhsmHdr} $(addprefix ../, ${LibUtilHdr}) ${LibFhsm}: $(foreach o, ${LibFhsmObj}, ${LibFhsm}(${o})) .NOTPARALLEL: ${LibFhsm} ${TgtObj}: %.o: %.c ${LibFhsmHdr} $(addprefix ../, ${LibUtilHdr}) ${LibFhsm} #${Tgt}: override LDFLAGS += -static ${Tgt}: override LDFLAGS += -s ${Tgt}: override LDLIBS += -L. -lfhsm -L.. -lautil -lrt ######################################## install_ubin: File = ${UsrBin} install_ubin: Tgt = ${DESTDIR}/usr/bin install_ulib: File = ${UsrLib} install_ulib: Tgt = ${DESTDIR}/usr/lib install_ubin install_ulib: ${File} ${INSTALL} -d ${Tgt} ${Install} -m 755 ${File} ${Tgt} install_man5: File = ${Man5} install_man5: Tgt = ${DESTDIR}${ManDir}/man5 install_man8: File = ${Man8} install_man8: Tgt = ${DESTDIR}${ManDir}/man8 install_man5 install_man8: ${File} ${INSTALL} -d ${Tgt} ${Install} -m 644 ${File} ${Tgt} install_man: install_man5 install_man8 -include priv.mk fhsm/aufhsm-list000077500000000000000000000030541315652647700142170ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2011-2014 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # aufs FHSM, the lister # $0 output_dir output_filename # print the files which should be moved-down. # the list is processed in reverse order, ie. bottom to top. # format: set -eu rc=${DebugRc:-/etc/default/aufs} . $rc umask 0 dst=$1/$2 tmp=$1/$$ find . -xdev -type f -links 1 \ ! -name "${AUFS_WH_PFX}*" \ -printf '%A@ %b %P\0' > $dst # find . -xdev -maxdepth 1 -printf '%P\n' | # while read i # do find "$i" -xdev -type f -links 1 \ # ! -name "${AUFS_WH_PFX}*" \ # -fprintf '%A@ %b %p\0' "$dst.$i.list" & # done # wait # cat "$dst.*.list" > $dst #tr '\0' '\n' < $dst >> $dbg # Hoping sort(1) may gain performance, without pipes. # todo: for MP systems, develop the parallel-sort in the future sort -nz -k2 $dst > $tmp sort -rsnz $tmp > $dst rm -f $tmp fhsm/aufhsm-list.8000066400000000000000000000030061315652647700143570ustar00rootroot00000000000000.\" .so aufhsm-inc.8 .\" .so an-ext.tmac .\" see groff_man(7) .TH aufhsm-list 8 Linux "Linux Aufs User's Manual" .SH NAME aufhsm\-list \- AUFS File-based Hierarchical Storage Management (FHSM), the lister .\" ---------------------------------------------------------------------- .SH SYNOPSIS .SY aufhsm\-list .RI output_dir .RI output_filename .YS .\" ---------------------------------------------------------------------- .SH DESCRIPTION Generates a filename list in the format of .nf . .fi This command is invoked by aufhsmd(8) internally in order to decide the "move\-down" target files. Since this is a simple shell script (currently), you can re-write and get the better performance if you want. .\" ---------------------------------------------------------------------- .\" .SH OPTIONS .\" ---------------------------------------------------------------------- .\" .SH ERRORS .\" ---------------------------------------------------------------------- .\" .SH ENVIRONMENT .\" ---------------------------------------------------------------------- .\" .SH NOTES .\" ---------------------------------------------------------------------- .\" .SH BUGS .\" ---------------------------------------------------------------------- .\" .SH EXAMPLE .\" ---------------------------------------------------------------------- .SH SEE ALSO .BR aufs_fhsm (5), .BR aufhsm (8), .BR aufhsmd (8), .BR aufs (5) .SH COPYRIGHT Copyright \(co 2011-2015 Junjiro R. Okajima .SH AUTHOR Junjiro R. Okajima fhsm/aufhsm.8000066400000000000000000000040771315652647700134170ustar00rootroot00000000000000.\" .so aufhsm-inc.8 .\" .so an-ext.tmac .\" see groff_man(7) .TH aufhsm 8 Linux "Linux Aufs User's Manual" .SH NAME aufhsm \- File-based Hierarchical Storage Management (FHSM) in AUFS, the controller .\" ---------------------------------------------------------------------- .SH SYNOPSIS .SY aufhsm .RI [ option ] .RI AufsMountPoint .RI [ BranchPath=UPPER\-LOWER .IR .\|.\|. ] .YS .\" ---------------------------------------------------------------------- .SH DESCRIPTION Set the watermarks for AUFS FHSM and control the daemon for it. The mount/umount helper invokes this command internally to control the daemon. So users don't have to invoke it generally. But users may want to use this command to set the watermarks. .\" ---------------------------------------------------------------------- .SH OPTIONS .TP \-i, \-\-inode .B BranchPath=UPPER\-LOWER parameter represents the value for the number of inodes instead of blocks. . .TP \-d DIR, \-\-dir DIR use DIR to store the aufhsm file-list . .TP \-r, \-\-recreate Unlink the existing internal files and newly re-create them. . .TP \-k, \-\-kill Terminates the daemon (aufhsmd(8)). . .TP \-q, \-\-quiet Does not print the current watermarks. .\" . .\" .TP .\" \-v, \-\-verbose .\" . .\" .TP .\" \-V, \-\-version . .TP \-h, \-\-help .\" ---------------------------------------------------------------------- .\" .SH ERRORS .\" ---------------------------------------------------------------------- .SH ENVIRONMENT .TP AUFHSM_LIST_DIR Has the same effect of \-d option. When both are give, only \-d option has its effect. .\" ---------------------------------------------------------------------- .\" .SH NOTES .\" ---------------------------------------------------------------------- .\" .SH BUGS .\" ---------------------------------------------------------------------- .SH EXAMPLE .\" ---------------------------------------------------------------------- .SH SEE ALSO .BR aufs_fhsm (5), .BR aufhsmd (8), .BR aufhsm-list (8), .BR aumvdown (8), .BR aufs (5) .SH COPYRIGHT Copyright \(co 2011-2015 Junjiro R. Okajima .SH AUTHOR Junjiro R. Okajima fhsm/aufhsm.c000066400000000000000000000173571315652647700134770ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, the controller */ #include #include #include #include #include #include #include #include #include #include "../au_util.h" #include "comm.h" #include "log.h" static void usage(void) { fprintf(stderr, "Usage: %s [ options ] aufs_mount_point " "[ BranchPath=UPPER-LOWER ... ] | [ UPPER-LOWER ]\n" "\t-i | --inode\t\tset watermark for inode\n" "\t-d DIR | --dir DIR\t\tuse DIR to store the aufhsm file-list\n" "\t-r | --recreate\t\trecreate the data file\n" "\t-k | --kill\t\tkill aufhsmd\n" "\t-q | --quiet\n" "\t-v | --verbose\n" "\t-V | --version\n" "\t-h | --help\n" "Instead of '-d', you can use AUFHSM_LIST_DIR\n", program_invocation_short_name); } enum { OptFhsm_INODE, OptFhsm_RECREATE, OptFhsm_KILL, OptFhsm_QUIET, OptFhsm_VERBOSE }; static struct option opts[] = { {"inode", no_argument, NULL, 'i'}, {"dir", required_argument, NULL, 'd'}, {"recreate", no_argument, NULL, 'r'}, {"kill", no_argument, NULL, 'k'}, {"quiet", no_argument, NULL, 'q'}, {"verbose", no_argument, NULL, 'v'}, /* as usual */ {"version", no_argument, NULL, 'V'}, {"help", no_argument, NULL, 'h'}, {NULL, no_argument, NULL, 0} }; static const char short_opts[] = "id:rkqv" "Vh"; static unsigned long optflags; #define opt_set(f, name) (f) |= 1 << OptFhsm_##name #define opt_clr(f, name) (f) &= ~(1 << OptFhsm_##name) #define opt_test(f, name) ((f) & (1 << OptFhsm_##name)) static void opt(int argc, char *argv[]) { int opt, i, err, need_ck, done; char *dir, *p; done = 0; dir = getenv("AUFHSM_LIST_DIR"); for (i = 1; !done && i < argc; i++) { opt = getopt_long(argc, argv, short_opts, opts, NULL); switch (opt) { case -1: done = 1; break; case 'i': opt_set(optflags, INODE); break; case 'd': dir = optarg; break; case 'r': opt_set(optflags, RECREATE); break; case 'k': opt_set(optflags, KILL); break; case 'q': opt_set(optflags, QUIET); break; case 'v': opt_set(optflags, VERBOSE); break; case 'V': printf("%s version %s\n", program_invocation_short_name, AuVersion); exit(0); case 'h': case '?': usage(); exit(0); default: //usage(); exit(EINVAL); } } if (dir) { p = realpath(dir, NULL); if (!p || !*p) AuFin("%s", dir); dir = strdup(p); if (!dir) AuFin("%s", p); need_ck = 1; } else { dir = au_list_dir_def(); if (!dir) AuFin("au_list_dir_def"); need_ck = 0; } err = au_list_dir_set(dir, need_ck); if (err) AuFin("au_list_dir_set"); /* unfree dir */ } /* ---------------------------------------------------------------------- */ /* * Set the watermarks to 'wm'. */ static void do_wmark(struct aufhsm_wmark *wm, char *str) { int err; float a[AuFhsm_WM_Last]; err = sscanf(str, "%f-%f", a + AuFhsm_WM_UPPER, a + AuFhsm_WM_LOWER); if (err != 2 || a[AuFhsm_WM_UPPER] > 100 || a[AuFhsm_WM_UPPER] < 0 || a[AuFhsm_WM_LOWER] > 100 || a[AuFhsm_WM_LOWER] < 0 || a[AuFhsm_WM_UPPER] < a[AuFhsm_WM_LOWER]) { errno = EINVAL; AuFin("%s", str); } /* free ratio */ a[AuFhsm_WM_UPPER] = (100 - a[AuFhsm_WM_UPPER]) / 100; a[AuFhsm_WM_LOWER] = (100 - a[AuFhsm_WM_LOWER]) / 100; if (!opt_test(optflags, INODE)) memcpy(wm->block, a, sizeof(wm->block)); else memcpy(wm->inode, a, sizeof(wm->inode)); } static void wmark(char *str, struct aufhsm *fhsm, int nbr, union aufs_brinfo *brinfo) { int i, nwmark; char *p; struct aufhsm_wmark *wm; wm = NULL; p = strrchr(str, '='); if (p) { *p = '\0'; brinfo = au_br_search_path(str, nbr, brinfo); if (brinfo) { wm = au_wm_lfind(brinfo->id, fhsm->wmark, fhsm->nwmark); if (wm) do_wmark(wm, p + 1); } else { errno = 0; AuFin("no such branch, %s", str); } } else { nwmark = fhsm->nwmark; wm = fhsm->wmark; do_wmark(wm, str); if (!opt_test(optflags, INODE)) for (i = 1; i < nwmark; i++) memcpy(wm[i].block, wm[i - 1].block, sizeof(wm->block)); else for (i = 1; i < nwmark; i++) memcpy(wm[i].inode, wm[i - 1].inode, sizeof(wm->inode)); } } /* ---------------------------------------------------------------------- */ static int au_run_fhsmd(char *mntpnt, int verbose) { int err, waited, status, i; char *av[6]; pid_t pid; i = 0; av[i++] = basename(AUFHSMD_CMD); av[i++] = "--dir"; av[i++] = au_list_dir(); if (verbose) av[i++] = "--verbose"; av[i++] = mntpnt; av[i] = NULL; assert(i < sizeof(av) / sizeof(*av)); pid = fork(); if (!pid) { #if 0 int i; for (i = 0; av[i]; i++) puts(av[i]); //return; #endif execve(AUFHSMD_CMD, av, environ); AuFin(AUFHSMD_CMD); } else if (pid > 0) { waited = waitpid(pid, &status, 0); if (waited == pid) { err = WEXITSTATUS(status); /* error msgs should be printed by the controller */ } else { /* should not happen */ err = -1; AuLogErr("waitpid"); } } else { err = pid; AuLogErr("fork"); } return err; } int main(int argc, char *argv[]) { int err, nbr, nfhsm, rootfd, i, do_notify, shmfd; struct statfs stfs; char name[32]; struct aufhsm *fhsm; char *mntpnt; union aufs_brinfo *brinfo, *sorted; do_notify = 0; /* better to test the capability? */ if (getuid()) { errno = EPERM; AuFin(NULL); } opt(argc, argv); if (optind == argc) { usage(); errno = EINVAL; AuFin(NULL); } mntpnt = realpath(argv[optind], NULL); if (!mntpnt) AuFin("%s", mntpnt); rootfd = open(mntpnt, O_RDONLY | O_CLOEXEC /* | O_PATH */); if (rootfd < 0) AuFin("%s", mntpnt); err = fstatfs(rootfd, &stfs); if (err) AuFin("%s", mntpnt); if (stfs.f_type != AUFS_SUPER_MAGIC) { errno = EINVAL; AuFin("%s is not aufs (0x%lx)", mntpnt, (long)stfs.f_type); } err = au_shm_name(rootfd, name, sizeof(name)); if (err) AuFin("au_shm_name"); if (opt_test(optflags, RECREATE)) { do_notify = 1; if (shm_unlink(name) && errno != ENOENT) AuWarn("%s, %m", name); } else if (opt_test(optflags, KILL)) { if (au_fhsm_msg(name, AuFhsm_MSG_EXIT, rootfd)) AuWarn("%s, %m", name); err = 0; errno = 0; goto out; } err = au_br(&brinfo, &nbr, mntpnt); if (err) goto out; nfhsm = au_nfhsm(nbr, brinfo); if (nfhsm < 2) { errno = EINVAL; AuFin("few fhsm branches for %s", mntpnt); } /* shmfd will be locked */ err = au_fhsm(name, nfhsm, nbr, brinfo, &shmfd, &fhsm); if (err) { AuWarn("au_fhsm, %m"); goto out; } /* set the watermarks */ sorted = calloc(nbr, sizeof(*brinfo)); if (!sorted) AuFin("calloc"); memcpy(sorted, brinfo, nbr * sizeof(*brinfo)); au_br_sort_path(nbr, sorted); for (i = optind + 1; i < argc; i++) { wmark(argv[i], fhsm, nbr, sorted); do_notify = 1; } if (!opt_test(optflags, QUIET)) au_fhsm_dump(mntpnt, fhsm, brinfo, nbr); free(brinfo); free(sorted); au_fhsm_sign(fhsm); /* shmfd will be unlocked */ err = close(shmfd); if (err) { AuWarn("close, %m"); goto out; } if (do_notify) au_fhsm_msg(name, AuFhsm_MSG_READ, /*rootfd*/-1); err = au_run_fhsmd(mntpnt, opt_test(optflags, VERBOSE)); out: return err; } fhsm/aufhsmd.8000066400000000000000000000033341315652647700135560ustar00rootroot00000000000000.\" .so aufhsm-inc.8 .\" .so an-ext.tmac .\" see groff_man(7) .TH aufhsmd 8 Linux "Linux Aufs User's Manual" .SH NAME aufhsmd \- File-based Hierarchical Storage Management (FHSM) in AUFS, the daemon .\" ---------------------------------------------------------------------- .SH SYNOPSIS .SY aufhsmd .RI [ option ] .RI AufsMountPoint .YS .\" ---------------------------------------------------------------------- .SH DESCRIPTION Monitor the notification from aufs and move-down the files from the upper branch to the lower according to the watermarks and branch attributes. Usually the controller, aufhsm(8) invokes and terminates this daemon, so users have not to run manually. But if you want, you can. .\" ---------------------------------------------------------------------- .SH OPTIONS .TP \-d DIR, \-\-dir DIR use DIR to store the aufhsm file-list .\" . .\" .TP .\" \-v, \-\-verbose .\" . .\" .TP .\" \-V, \-\-version . .TP \-h, \-\-help .\" ---------------------------------------------------------------------- .SH ERRORS .\" ---------------------------------------------------------------------- .SH ENVIRONMENT .TP AUFHSM_LIST_DIR Has the same effect of \-d option. When both are give, only \-d option has its effect. .\" ---------------------------------------------------------------------- .SH NOTES .\" ---------------------------------------------------------------------- .SH BUGS .\" ---------------------------------------------------------------------- .SH EXAMPLE .\" ---------------------------------------------------------------------- .SH SEE ALSO .BR aufs_fhsm (5), .BR aufhsm (8), .BR aufhsm-list (8), .BR aumvdown (8), .BR aufs (5) .SH COPYRIGHT Copyright \(co 2011-2015 Junjiro R. Okajima .SH AUTHOR Junjiro R. Okajima fhsm/aufhsmd.c000066400000000000000000000112661315652647700136340ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, the daemon */ #include #include #include #include #include #include "../au_util.h" #include "daemon.h" #include "log.h" struct aufhsmd fhsmd; /* ---------------------------------------------------------------------- */ static void usage(void) { fprintf(stderr, "Usage: %s [ options ] aufs_mount_point\n" "\t-d DIR | --dir DIR\t\tuse DIR to store the aufhsm file-list\n" "\t-v | --verbose\n" "\t-V | --version\n" "\t-h | --help\n" "Instead of '-d', you can use AUFHSM_LIST_DIR\n", program_invocation_short_name); } static struct option opts[] = { {"dir", required_argument, NULL, 'd'}, {"verbose", no_argument, NULL, 'v'}, /* hidden */ {"no-daemon", no_argument, NULL, 'D'}, /* as usual */ {"version", no_argument, NULL, 'V'}, {"help", no_argument, NULL, 'h'}, {NULL, no_argument, NULL, 0} }; static const char short_opts[] = "d:v" "D" "Vh"; static void opt(int argc, char *argv[]) { int opt, i, err, need_ck, done; char *dir, *p; done = 0; dir = getenv("AUFHSM_LIST_DIR"); for (i = 1; !done && i < argc; i++) { opt = getopt_long(argc, argv, short_opts, opts, NULL); switch (opt) { case -1: done = 1; break; case 'd': dir = optarg; break; case 'v': au_opt_set(fhsmd.optflags, VERBOSE); break; case 'D': au_opt_set(fhsmd.optflags, NODAEMON); break; case 'V': printf("%s version %s\n", program_invocation_short_name, AuVersion); exit(0); case 'h': case '?': usage(); exit(0); default: //usage(); exit(EINVAL); } } if (dir) { p = realpath(dir, NULL); if (!p || !*p) AuFin("%s", dir); dir = strdup(p); if (!dir) AuFin("%s", p); need_ck = 1; } else { dir = au_list_dir_def(); if (!dir) AuFin("au_list_dir_def"); need_ck = 0; } err = au_list_dir_set(dir, need_ck); if (err) AuFin("au_list_dir_set"); /* unfree dir */ } /* ---------------------------------------------------------------------- */ static void shm_names(char *path, struct au_name name[], unsigned int nlen) { int err; char *p; if (path) { err = open(path, O_RDONLY); if (err < 0) AuLogFin("open %s", path); fhsmd.fd[AuFd_ROOT] = err; } err = au_shm_name(fhsmd.fd[AuFd_ROOT], fhsmd.name[AuName_FHSMD].a, nlen); if (err) AuLogFin("au_shm_name"); strcpy(fhsmd.name[AuName_LCOPY].a, fhsmd.name[AuName_FHSMD].a); p = fhsmd.name[AuName_LCOPY].a + sizeof("aufhsm"); memmove(p, p + 1, strlen(p)); /* including the terminator */ } static void comm_fd(char *name, int *fhsmfd, int *msgfd) { *fhsmfd = ioctl(fhsmd.fd[AuFd_ROOT], AUFS_CTL_FHSM_FD, O_CLOEXEC | O_NONBLOCK); if (*fhsmfd < 0) AuLogFin("AUFS_CTL_FHSM_FD"); *msgfd = au_fhsm_msg(name, AuFhsm_MSG_NONE, /*rootfd*/-1); if (*msgfd < 0) AuLogFin("msg %s", name); } /* ---------------------------------------------------------------------- */ int main(int argc, char *argv[]) { int err; char *mntpnt; opt(argc, argv); if (optind == argc) { //usage(); errno = EINVAL; AuFin(NULL); } mntpnt = realpath(argv[optind], NULL); if (!mntpnt) AuFin("%s", mntpnt); err = chdir(mntpnt); if (err) AuFin("%s", mntpnt); if (!au_opt_test(fhsmd.optflags, NODAEMON)) { au_do_syslog = 1; openlog(program_invocation_short_name, AuFhsmd_OPTION, AuFhsmd_FACILITY); err = daemon(/*nochdir*/0, /*noclose*/0); if (err) AuFin("daemon"); } shm_names(mntpnt, fhsmd.name, sizeof(*fhsmd.name)); free(mntpnt); comm_fd(fhsmd.name[AuName_LCOPY].a, &fhsmd.fd[AuFd_FHSM], &fhsmd.fd[AuFd_MSG]); INIT_LIST_HEAD(&fhsmd.in_ope); err = au_fhsmd_load(); if (err) AuLogFin("au_fhsmd_load"); err = au_epsigfd(); if (err) AuLogFin("create_epsig"); err = au_ep_add(fhsmd.fd[AuFd_MSG], EPOLLIN | EPOLLPRI); if (err) AuLogFin("au_ep_add"); err = au_ep_add(fhsmd.fd[AuFd_FHSM], EPOLLIN | EPOLLPRI); if (err) AuLogFin("au_ep_add"); /* main loop */ err = au_fhsmd_loop(); AuDbgFhsmLog("exit %d", err); return err; } fhsm/aufs_fhsm.5000066400000000000000000000330341315652647700140770ustar00rootroot00000000000000.eo .de TQ .br .ns .TP \$1 .. .de Bu .IP \(bu 4 .. .ec .\" end of macro definitions . .\" ---------------------------------------------------------------------- .TH aufs_fhsm 5 Linux "Linux Aufs User's Manual" .SH NAME aufhsm\-list \- AUFS File\-based Hierarchical Storage Management (FHSM) .\" ---------------------------------------------------------------------- .\" .SH DESCRIPTION Hierarchical Storage Management (or HSM) is a well\-known feature in the storage world. Aufs provides this feature as file\-based with multiple writable branches, based upon the principle of "Colder\-Lower". Here the word "colder" means that the less used files, and "lower" means that the position in the order of the stacked branches. These multiple writable branches are prioritized, ie. the topmost one should be the fastest drive and be used heavily. .\" ---------------------------------------------------------------------- .SH the Controller .SS What the controller does .RS .Bu create a POSIX shared memory and store the user\-specified watermarks in it. .Bu control the life of the daemon. .Bu make the daemon to re\-read the watermarks. .RE .SS Shared memory and protection The controller allocates the POSIX shared memory (under /dev/shm generally which is decided by system) and initializes it by setting the default watermarks. User can change the watermarks by invoking the controller manually. Since the controller can be invoked anytime, it may happen that the multiple instances run concurrently. To protect the watermarks in the shared memory from concurrent modification, the controller sets fcntl(F_SETLKW) to the shared memory. This lock also protects the watermark from the daemon which reads the watermark, ie. prohibit reading it during the controller is modifying. .\" ---------------------------------------------------------------------- .SH the Daemon .SS What the daemon does .RS .Bu invoked by the controller at aufs mount\-time. .Bu read the user\-specified watermarks stored in POSIX shared memory, and keep a copy of it internally. .Bu establish the two communications. One for aufs and the other is for the controller. .RS .Bu use a named pipe (under the same dir to POSIX shared memory) to communicate with the controller. .Bu the controller may not exist at that time, but it may appear and tell the daemon something later. .Bu use a special file descriptor (created by aufs ioctl(2)) to receive a notification from aufs. .RE .Bu create epollfd and signalfd too in order to monitor the notifications from user, aufs and the controller. .Bu main loop, monitor these three file descriptors. .RS .Bu signal is sent (to signalfd, generally from user). --> exit. .Bu notified from the controller. --> take an action according to the message, eg, read the watermarks again or exit. .Bu notified from aufs. --> run the move\-down operation. .RE .RE .SSSignal handling Generally speaking, users should not send SIGKILL to any process easily. That is the final resort to force terminating the process, and SIGTERM is preferable in most cases. The daemon handles these signals. .TP .B SIGINT .TQ .B SIGQUIT .TQ .B SIGTERM Exit naturally. .TP .B SIGHUP This has no meaning. It is handled just because many other generic daemons handle it to re\-read their configuration. For this daemon, such configuration is done via the controller and SIGHUP is not really necessary. But users who might not read the documents about this daemon may try sending SIGHUP blindly, with expecting to make the daemon refreshes some configuration. It is totally wrong actually, but the daemon will allow such users and simply ignore SIGHUP. Otherwise the signal will terminate the process. .TP .B SIGCHLD During the daemon invokes a child process and it is running, the daemon handles SIGCHLD too in order not to make the child zombie. All other signals are not handled and will take the default actions. .SS Messages from the controller The controller controls the life of daemon. It invokes and terminates the daemon. For invoking, the controller simply runs the daemon. If the daemon for the same aufs mount\-point is already running, then the daemon detects it by itself (told by aufs, actually) and exits. So a single aufs mount\-point will never have multiple corresponding daemons. For terminating, the controller opens a fifo to communicate with the daemon, and sends a certain message. Then the daemon exists successfully. By remounting aufs mount\-point and adding/deleting its member branches, the number of watermarks will change and the daemon should track it. So there is a message to make the daemon re\-read the watermarks. All these messages are sent from the controller automagically when it is invoked by /sbin/mount.aufs. But user can send the messages anytime he wants by running the controller manually. .SS Notification from aufs (kernel\-space) When user issues write(2) to a file in aufs, aufs processes the request as usual, and tell the daemon the news via the special file descriptor. The special file descriptor is created based upon the ioctl(2) from the daemon. If it is not created (eg. the daemon is not running), aufs doesn't make the notification. Aufs can detect the simple write to the branch fs, but cannot detect the complicated one which is "mmap + fixing a hole". In this case, the notification is not sent. If user places the XINO files on the writable branch (which is put on the first writable branch by default), the notification is not sent when the size of XINO files grows either. When you use FHSM feature, it is recommended to specify the path of XINO as outside of aufs. If user modifes a file on the writable branch directly (bypassing aufs) and the size of the file grows, then the notification is not sent either. (Someday in the future, aufs may provide another feature to support this case) .\" ---------------------------------------------------------------------- .SH the Lister .SS The list of filenames which should be moved\-down The daemon runs the external list\-command (the lister, which is find(1) and sort(1) currently) in order to get the filename list. The list is sorted by the timestamp (atime) and consumed blocks by the file. This sort decides the order/priority of files to be moved\-down. First, the most unused file should be moved\-down. This is decided by the timestamp (atime) of the file. When multiple files has the same atime, then they are sorted again by the consumed blocks, which means the larger file will be moved\-down earlier. It means users should not specify "noatime" mount option for the aufs branch. "relatime" (the linux default) will be OK, but it may lead us to a rough (the precision may not be high) decision. "strictatime" will be best. Note that "strictatime" may costs high due to its frequent update of atime. My general recommendation is "relatime" (the linux default). .SS Caution The scan to get the file list may cost high. it will be equivalent to "find /branch/fs \-ls" and sort. Additionally the size of the list may be huge. .SS Suggested solutions Since the lister is an external command, user can customize it easily. For example, when the disk where the branch fs resides is RAID (or something) and can endure the multiple find(1), then we may invoke find(1) for every first level entries in the branch. For example, .nf $ find . -maxdepth 1 -printf '%P\\n' | > while read i > do find $i ...conditions... -fprintf 'format' /tmp/$i.list & > done $ wait .fi This approach will be effective when the disk drive is fast enough and allows multiple find(1). And it may be better to have multiple CPUs. For sorting, which is also a CPU eater when the list is large, it is a good idea to develop a new multi\-threaded sort command if user have multiple CPUs. .SS Which file to be moved\-down Currently we handle the single\-linked and not\-in\-use regular files only. The directories, special files, etc are not moved\-down. The hard\-linked (pseudo-link in aufs) files (whose link count is more than one) are not moved\-down either. .\" ---------------------------------------------------------------------- .SH Move\-down operation .SS What the move\-down does .RS .Bu the daemon receives a notification from aufs. .Bu compare the ratio of consumed blocks and inodes with the user\-specified watermarks (in local\-copy). if it doesn't exceed, the operation doesn't start. . .Bu fork a child process to process the branch which exceed the watermark. .Bu get the writable branch root by special aufs ioctl(2). (the file descriptor may be got by simple open(2). but the branch may be hidden from userspace, so it is better to ask aufs) .Bu fchdir(2) to the writable branch root and run the external lister to get the sorted filename list to move\-down. .Bu pick a single filename from the list. .Bu open the file and issue a special aufs ioctl(2) to it (the body of move\-down). .Bu if it succeeds, then try next filename and continue until reaching the lower watermark or the end of the list. Or the destination (the next lower writable branch) may exceed the upper watermark. .Bu if the move\-down ioctl(2) returns an error, we need to handle it according to its reason. (see below) .Bu additionally, during the child process is running the loop of processing the file list, the request to terminate the daemon may arrive from the controller or user. we also need to handle it. .RE .SS Move\-down one by one The child process (of the daemon) reads the list and move\-down the file one by one. If an recoverable error happens in the move\-down operation, the filename is appended to another list file in order to retry later. By moving\-down, the consumed blocks/index on the target branch may exceeds the watermark. If it happens, the child stops processing the current branch, tells its parent daemon to proceed to the target branch, and exits. The parent daemon receives the notification from its child, and forks a new child process to process the next (told as 'target') branch. It may happen recursively. And if the specified watermarks are very narrow (the range between upper and lower), it may also happen that repeated fork/exit. But I don't think it a problem (currently). When all the filenames in the list generated by the external command (the lister) are handled and a new notification arrived from aufs, the daemon recreate the list file. .SS Turn\-over The daemon begins the move\-down operation, and it ends when any of these things happen. .RS .Bu reach the lower watermark. .Bu reach the end of the list. .Bu the next lower writable branch exceeds the upper watermark. .Bu requested by user. .RE In all cases, the filename\-list remains. And the daemon in next turn begins with this list. But this time, there may exist the failed\-filename\-list which is generated by the previous turn (by EBUSY or ENOMEM). They are the filenames to be moved\-down still. So the daemon concatenates the filename\-list and the failed\-filename\-list before starting the move\-down operation, and makes the failed\-filename\-list empty. The failed filenames will be appended to the failed\-list again. But it is OK. This concatenation will be skipped when the (original) filename\-list is empty. .SS Supporting the errors The aufs move\-down ioctl(2) returns the various errors, and we should handle them case\-by\-case carefully. .TP .B EBUSY The file to be moved\-down is in\-use currently, and aufs rejects it to proceed. The filename is appended to the failed\-file\-list and should be tried later. .TP .B EROFS The same named file already exists on the readonly branch which is upper than the next writable branch. For instance, .RS .Bu /aufs = /rw0 + /ro1 + /rw2. .Bu /ro1/fileA exists. .Bu /rw0/fileA exists too. .Bu /rw0 becomes nearly full (exceeds the upper watermark), and the move\-down begins. .Bu the daemon finds /aufs/fileA and requests aufs to move it down to rw2. .Bu aufs finds /ro1/fileA and rejects the operation. .RE In this case, the file should not be tried anymore. The filename is simply removed from the list, not appended to the failed\-list. .TP .B ENOENT The filename existed when the lister ran, but it is removed later (before being moved\-down). The file should not be tried anymore. .TP .B EPERM .TQ .B EFAULT .TQ .B EINVAL .TQ .B EBADF .TQ .B EEXIST (if not forcing) They all mean the internal errors. Let's enjoy debugging. .TP .B ENOMEM A single move\-down operation doesn't require so much memory, but a little. If it happens, then then the daemon appends the filename to the failed\-filename\-list. And it will be retried later. .\" ---------------------------------------------------------------------- .\" .SH ENVIRONMENT .\" ---------------------------------------------------------------------- .\" .SH NOTES .\" In autumn in 2011, I have discussed with a few people about how we .\" implement and provide .\" the FHSM feature with comparing the implementaion in block\-device layer .\" versus filesystem layer. .\" If we consider FHSM as a sort of caching mechanism, Linux Devicce Mapper .\" will be a better option. But we want to provide the feature .\" not only caching but also extending the capacity (the filesystem size). .\" And we found it will be unrealistic if we implement it in DM. .\" Finally we decided implementing it as a new feature of AUFS (advanced .\" multi layered unification filesystem). .\" ---------------------------------------------------------------------- .\" .SH BUGS .\" ---------------------------------------------------------------------- .\" .SH EXAMPLE .\" ---------------------------------------------------------------------- .SH SEE ALSO .BR aufs (5), .BR aumvdown (8), .BR aufhsm (8), .BR aufhsmd (8), .BR aufhsm-list (8) .SH COPYRIGHT Copyright \(co 2011\-2015 Junjiro R. Okajima .SH AUTHOR Junjiro R. Okajima fhsm/comm.h000066400000000000000000000104631315652647700131430ustar00rootroot00000000000000/* * Copyright (C) 2011-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, global declarations, common to the controller and the daemon */ #ifndef AuFhsm_COMMON_H #define AuFhsm_COMMON_H #include #include /* musl libc */ #include #include #ifndef O_PATH #define O_PATH 010000000 #endif #define AUFHSM_MAGIC "AUFHSM" #ifdef AUFHSM_UT #define static_unless_ut #else #define static_unless_ut static #endif /* ---------------------------------------------------------------------- */ /* the format of single line in the list */ struct au_fname { char *atime; char *sz; char *name; int len; }; /* ---------------------------------------------------------------------- */ /* messages between the controller and daemon */ typedef enum { AuFhsm_MSG_NONE, AuFhsm_MSG_READ, AuFhsm_MSG_EXIT } aufhsm_msg_t; /* watermark */ enum { AuFhsm_WM_UPPER, AuFhsm_WM_LOWER, AuFhsm_WM_Last }; struct aufhsm_wmark { /* branch id */ int16_t brid; /* free ratio */ float block[AuFhsm_WM_Last]; float inode[AuFhsm_WM_Last]; }; /* contents of shm */ struct aufhsm { char magic[8]; unsigned int csum; int nwmark; struct aufhsm_wmark wmark[0]; }; /* ---------------------------------------------------------------------- */ /* fhsm.c */ union aufs_brinfo; int au_fhsm(char *name, int nfhsm, int nbr, union aufs_brinfo *brinfo, int *rshmfd, struct aufhsm **rfhsm); unsigned int au_fhsm_csum(struct aufhsm *fhsm); void au_fhsm_dump(char *mntpnt, struct aufhsm *fhsm, union aufs_brinfo *brinfo, int nbr); struct aufhsm *au_fhsm_load(char *name); /* list.c */ int au_list_dir_set(char *dir, int need_ck); char *au_list_dir(void); int au_fname_failed(struct au_fname *fname, int failfd); void au_fname_one(char *o, off_t len, struct au_fname *fname); int au_list(int brfd, int *listfd, int *failfd); char *au_list_dir_def(void); /* msg.c */ int au_fhsm_msg(char *name, aufhsm_msg_t msg, int rootfd); /* shm.c */ int au_shm_name(int rootfd, char name[], int sz); char *au_shm_dir(int fd); int au_shm_create(char *name, off_t len, int *rfd, void *_p); int au_shm_map(char *name, int *rfd, void *_p); /* ---------------------------------------------------------------------- */ static inline off_t au_fhsm_size(int nbr) { struct aufhsm *p; return sizeof(*p) + nbr * sizeof(*p->wmark); } static inline void au_fhsm_sign(struct aufhsm *fhsm) { strncpy(fhsm->magic, AUFHSM_MAGIC, sizeof(fhsm->magic)); fhsm->csum = au_fhsm_csum(fhsm); } static inline int au_fhsm_sign_verify(struct aufhsm *fhsm) { return !strncmp(fhsm->magic, AUFHSM_MAGIC, sizeof(fhsm->magic)) && fhsm->csum == au_fhsm_csum(fhsm); } /* ---------------------------------------------------------------------- */ /* quick sort */ static inline int au_wm_qsort_brid(const void *_a, const void *_b) { const struct aufhsm_wmark *a = _a, *b = _b; return a->brid - b->brid; } static inline void au_fhsm_sort_brid(struct aufhsm *fhsm) { qsort(fhsm->wmark, fhsm->nwmark, sizeof(*fhsm->wmark), au_wm_qsort_brid); } /* binary search */ static inline int au_wm_bsearch_brid(const void *_brid, const void *_wm) { int brid = (long)_brid; const struct aufhsm_wmark *wm = _wm; return brid - wm->brid; } static inline struct aufhsm_wmark *au_wm_search_brid(int brid, struct aufhsm *fhsm) { long l = brid; return bsearch((void *)l, fhsm->wmark, fhsm->nwmark, sizeof(*fhsm->wmark), au_wm_bsearch_brid); } /* linear search */ static inline struct aufhsm_wmark *au_wm_lfind(int brid, struct aufhsm_wmark *wm, int nwm) { while (nwm-- > 0) { if (wm->brid == brid) return wm; wm++; } return NULL; } #endif /* AuFhsm_COMMON_H */ fhsm/daemon.h000066400000000000000000000051331315652647700134510ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, theDaemon specific declarations */ #ifndef AuFhsm_DAEMON_H #define AuFhsm_DAEMON_H #include #include "comm.h" #include "linux-list.h" enum { AuName_LCOPY, /* local copy of watermarks */ AuName_FHSMD, AuName_Last }; /* name of POSIX shared memory */ struct au_name { char a[32]; }; /* POSIX shared memory for parent and child */ struct aufhsmd_comm { aufhsm_msg_t msg; int nstbr; struct aufs_stbr stbr[0]; }; enum { AuFd_ROOT, AuFd_FHSM, AuFd_SIGNAL, AuFd_EPOLL, AuFd_MSG, AuFd_Last }; /* * global variables in the daemon. * maintained by parent. readonly for child. */ struct aufhsmd { /* local copy of watermarks (free ratio), sorted by brid */ struct aufhsm *lcopy; struct aufhsmd_comm *comm; int fd[AuFd_Last]; unsigned long optflags; struct au_name name[AuName_Last]; /* in move-down operation */ struct list_head in_ope; }; extern struct aufhsmd fhsmd; struct in_ope { struct list_head list; int16_t brid; pid_t pid; }; /* ---------------------------------------------------------------------- */ /* command line options for the daemon */ enum { OptFhsmd_NODAEMON, OptFhsmd_VERBOSE }; #define au_opt_set(f, name) (f) |= 1 << OptFhsmd_##name #define au_opt_clr(f, name) (f) &= ~(1 << OptFhsmd_##name) #define au_opt_test(f, name) ((f) & (1 << OptFhsmd_##name)) /* ---------------------------------------------------------------------- */ /* event.c */ int au_fhsmd_load(void); int au_epsigfd(void); int au_ep_add(int fd, uint32_t event); int au_fhsmd_loop(void); /* mvdown.c */ int au_mvdown_run(struct aufs_stbr *cur, struct aufs_stbr **next); /* ---------------------------------------------------------------------- */ static inline off_t au_fhsmd_comm_len(int nstbr) { return sizeof(*fhsmd.comm) * sizeof(*fhsmd.comm->stbr) * nstbr; } #endif /* AuFhsm_DAEMON_H */ fhsm/event.c000066400000000000000000000175631315652647700133340ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, events on the daemon */ #include #include #include #include #include #include #include #include "daemon.h" #include "log.h" /* * make the Daemon's local copy of watermarks and other data sharable */ static_unless_ut struct aufhsmd_comm *fhsmd_comm(void) { struct aufhsmd_comm *p; int err, len, fd; char *name = fhsmd.name[AuName_FHSMD].a; len = au_fhsmd_comm_len(fhsmd.lcopy->nwmark); err = au_shm_create(name, len, &fd, &p); if (err) goto out; err = close(fd); if (err) { AuLogErr("close %s", name); goto out_unmap; } err = shm_unlink(name); if (err) { AuLogErr("unlink %s", name); goto out_unmap; } p->nstbr = fhsmd.lcopy->nwmark; /* todo: initial statfs */ goto out; /* success */ out_unmap: err = munmap(p, len); if (err) AuLogErr("%s", name); p = NULL; out: return p; } /* * load the Daemon's local copy of watermarks and other data */ int au_fhsmd_load(void) { int err, len, n; aufhsm_msg_t msg; err = -1; n = 0; if (fhsmd.lcopy) { n = fhsmd.lcopy->nwmark; free(fhsmd.lcopy); } fhsmd.lcopy = au_fhsm_load(fhsmd.name[AuName_LCOPY].a); if (!fhsmd.lcopy) goto out; errno = 0; AuDbgFhsmLog("n %d --> %d", n, fhsmd.lcopy->nwmark); if (n > fhsmd.lcopy->nwmark) { AuLogWarn("unmatching watermarks. re-run aufhsm"); goto out; } err = 0; msg = AuFhsm_MSG_NONE; if (fhsmd.comm) { msg = fhsmd.comm->msg; n = fhsmd.comm->nstbr; len = au_fhsmd_comm_len(n); err = munmap(fhsmd.comm, len); if (err) { AuLogErr("munmap"); goto out; } } fhsmd.comm = fhsmd_comm(); if (fhsmd.comm) { fhsmd.comm->msg = msg; goto out; /* success */ } err = -1; out: return err; } /* * read and handle a notification from aufs */ static_unless_ut int handle_fhsm(void) { int err, i, n, nstbr, len, me_again; ssize_t ssz; struct aufs_stbr *stbr, *next, *cur; err = 0; read: stbr = fhsmd.comm->stbr; nstbr = fhsmd.comm->nstbr; len = nstbr * sizeof(*stbr); ssz = read(fhsmd.fd[AuFd_FHSM], stbr, len); AuDbgFhsmLog("ssz %zd", ssz); if (!ssz) goto out; if (ssz > 0) { errno = 0; n = ssz / sizeof(*stbr); assert(ssz == n * sizeof(*stbr)); for (i = 0; !err && i < n; i++, stbr++) { me_again = 0; cur = stbr; while (1) { err = au_mvdown_run(cur, &next); if (err || !next) break; /* inner while-loop */ assert(next != cur); cur = next; me_again = 1; } if (!err && me_again) { /* process the same branch again */ stbr--; i--; } } goto out; } err = -1; switch (errno) { case EMSGSIZE: /* more branches */ err = au_fhsmd_load(); if (!err) goto read; /* again */ break; default: AuLogErr("AuFd_FHSM"); //?? } out: return err; } /* ---------------------------------------------------------------------- */ /* * read and handle a message from the controller */ static_unless_ut int handle_msg(int *msg_exit) { int err; ssize_t ssz; aufhsm_msg_t msg; err = 0; *msg_exit = 0; ssz = read(fhsmd.fd[AuFd_MSG], &msg, sizeof(msg)); if (ssz == -1) { if (errno == EAGAIN) goto out; /* message is not sent, shoud not stop */ /* should not happen */ err = -1; AuLogErr("AuFd_MSG"); goto out; } AuDbgFhsmLog("Got message %d", msg); switch (msg) { case AuFhsm_MSG_READ: err = au_fhsmd_load(); break; case AuFhsm_MSG_EXIT: fhsmd.comm->msg = msg; *msg_exit = 1; break; default: /* should not happen */ err = -1; AuLogErr("msg %d", msg); } out: return err; } /* * read and handle a signal from user */ static_unless_ut int handle_sig(int *status) { int err, found; struct signalfd_siginfo ssi; ssize_t ssz; pid_t pid; struct in_ope *in_ope; err = 0; ssz = read(fhsmd.fd[AuFd_SIGNAL], &ssi, sizeof(ssi)); if (ssz == -1) { if (errno == EAGAIN) goto out; /* signal is not sent, shoud not stop */ /* should not happen */ err = -1; AuLogErr("AuFd_SIGNAL"); goto out; } AuDbgFhsmLog("[%d] got signal %u, pid %d, status %d", getpid(), ssi.ssi_signo, ssi.ssi_pid, ssi.ssi_status); err = ssi.ssi_signo; switch (ssi.ssi_signo) { case SIGCHLD: found = 0; list_for_each_entry(in_ope, &fhsmd.in_ope, list) { if (in_ope->pid == ssi.ssi_pid) { list_del(&in_ope->list); free(in_ope); found = 1; break; } } if (!found) AuLogErr("unknown child [%d]", ssi.ssi_pid); pid = waitpid(ssi.ssi_pid, status, 0); if (pid != ssi.ssi_pid) { /* should not happen */ err = -1; AuLogErr("pid %d, %d", pid, ssi.ssi_pid); } /*FALLTHROUGH*/ case SIGHUP: /* simply ignore */ err = 0; break; default: /* signal is sent, should stop */ break; } out: return err; } static_unless_ut int sigfd(void) { int err, i; int sig[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM, SIGCHLD}; sigset_t mask; sigemptyset(&mask); for (i = 0; i < sizeof(sig) / sizeof(*sig); i++) sigaddset(&mask, sig[i]); err = sigprocmask(SIG_BLOCK, &mask, NULL); if (!err) { err = signalfd(-1, &mask, SFD_NONBLOCK | SFD_CLOEXEC); if (err < 0) AuLogErr("signalfd"); } else AuLogErr("sigprocmask"); return err; } int au_ep_add(int fd, uint32_t event) { int err; struct epoll_event ev = { .events = event, .data.fd = fd }; err = epoll_ctl(fhsmd.fd[AuFd_EPOLL], EPOLL_CTL_ADD, fd, &ev); if (err) AuLogErr("EPOLL_CTL_ADD"); return err; } int au_epsigfd(void) { int err; fhsmd.fd[AuFd_SIGNAL] = sigfd(); err = fhsmd.fd[AuFd_SIGNAL]; if (fhsmd.fd[AuFd_SIGNAL] < 0) goto out; fhsmd.fd[AuFd_EPOLL] = epoll_create1(EPOLL_CLOEXEC); err = fhsmd.fd[AuFd_EPOLL]; if (fhsmd.fd[AuFd_EPOLL] < 0) { AuLogErr("epoll_create1"); goto out_sigfd; } err = au_ep_add(fhsmd.fd[AuFd_SIGNAL], EPOLLIN | EPOLLPRI); if (!err) goto out; /* success */ /* revert */ if (close(fhsmd.fd[AuFd_EPOLL])) AuLogErr("close"); out_sigfd: if (close(fhsmd.fd[AuFd_SIGNAL])) AuLogErr("close"); out: return err; } /* ---------------------------------------------------------------------- */ /* * event loop */ #define EVENTS 3 /* fhsmfd, sigfd, and msgfd */ int au_fhsmd_loop(void) { int err, done, nev, i, sig, status, msg_exit; struct epoll_event events[EVENTS]; err = 0; done = 0; while (!done || !list_empty(&fhsmd.in_ope)) { nev = epoll_wait(fhsmd.fd[AuFd_EPOLL], events, EVENTS, -1); if (nev < 0) { if (errno == EINTR) continue; //?? AuLogFin("epoll_wait"); //?? break; } for (i = 0; i < nev; i++) { if (events[i].data.fd == fhsmd.fd[AuFd_SIGNAL]) { sig = handle_sig(&status); if (sig == SIGCHLD) { err = WEXITSTATUS(status); if (err) AuLogInfo("child status %d", err); } else if (sig) done = 1; AuDbgFhsmLog("sig %d, done %d", sig, done); } else if (events[i].data.fd == fhsmd.fd[AuFd_MSG]) { err = handle_msg(&msg_exit); if (!err && msg_exit) { done = 1; AuDbgFhsmLog("done %d", done); } } else if (events[i].data.fd == fhsmd.fd[AuFd_FHSM]) { if (!done) { err = handle_fhsm(); if (err) { done = 1; AuDbgFhsmLog("done %d", done); } } } else { errno = ENOSYS; err = errno; AuLogFin("internal error, %p", events[i].data.ptr); } } } return err; } fhsm/fhsm.c000066400000000000000000000174601315652647700131440ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, core, mainly for the contoller */ #include #include #include #include #include "../au_util.h" #include "comm.h" #include "log.h" static void wm_init(struct aufhsm_wmark *wm, union aufs_brinfo *brinfo) { wm->brid = brinfo->id; /* 75% - 50% by default */ wm->block[AuFhsm_WM_UPPER] = 0.25; wm->block[AuFhsm_WM_LOWER] = 0.5; wm->inode[AuFhsm_WM_UPPER] = 1; wm->inode[AuFhsm_WM_LOWER] = 1; } /* ---------------------------------------------------------------------- */ /* * follow the branch management */ static_unless_ut int fhsm_shrink(int shmfd, struct aufhsm **rfhsm, int nfhsm) { int err; off_t len, cur_len; struct aufhsm *fhsm; fhsm = *rfhsm; cur_len = au_fhsm_size(fhsm->nwmark); len = au_fhsm_size(nfhsm); AuDbgFhsmLog("%lld --> %lld", (long long)cur_len, (long long)len); err = ftruncate(shmfd, len); if (err) { AuLogErr("ftruncate %lld, %lld", (long long)cur_len, (long long)len); goto out; } fhsm = mremap(fhsm, len, cur_len, MREMAP_MAYMOVE); if (fhsm != MAP_FAILED) { *rfhsm = fhsm; fhsm->nwmark = nfhsm; goto out; /* success */ } /* revert */ err = -1; AuLogErr("mremap %lld, %lld", (long long)cur_len, (long long)len); if (ftruncate(shmfd, cur_len)) AuLogErr("ftruncate %lld", (long long)cur_len); out: return err; } static_unless_ut int fhsm_expand(int shmfd, struct aufhsm **rfhsm, int nfhsm) { int err, i; off_t len, cur_len; struct aufhsm *fhsm; struct aufhsm_wmark *wm; fhsm = *rfhsm; cur_len = au_fhsm_size(fhsm->nwmark); len = au_fhsm_size(nfhsm); AuDbgFhsmLog("%lld --> %lld", (long long)cur_len, (long long)len); err = ftruncate(shmfd, len); if (err) { AuLogErr("ftruncate %lld, %lld", (long long)cur_len, (long long)len); goto out; } fhsm = mremap(fhsm, len, cur_len, MREMAP_MAYMOVE); if (fhsm != MAP_FAILED) { *rfhsm = fhsm; wm = fhsm->wmark + fhsm->nwmark; for (i = fhsm->nwmark; i < nfhsm; i++, wm++) wm->brid = -1; fhsm->nwmark = nfhsm; goto out; /* success */ } /* revert */ err = -1; AuLogErr("mremap %lld, %lld", (long long)cur_len, (long long)len); if (ftruncate(shmfd, cur_len)) AuLogErr("ftruncate %lld", (long long)cur_len); out: return err; } /* * nfhsm (the number of FHSM paticipant branches) is just a hint since aufs * branches may be changed after we check them. */ static_unless_ut int fhsm_create(char *name, int nfhsm, int nbr, union aufs_brinfo *brinfo, int *rfd, struct aufhsm **rfhsm) { int err, i; off_t len; struct aufhsm_wmark *wm; struct aufhsm *p; len = au_fhsm_size(nfhsm); err = au_shm_create(name, len, rfd, rfhsm); if (err) goto out; p = *rfhsm; p->nwmark = nfhsm; wm = p->wmark; for (i = 0; i < nfhsm; i++, wm++) wm->brid = -1; wm = p->wmark; for (i = 0; i < nbr; i++) if (au_br_fhsm(brinfo[i].perm)) wm_init(wm++, brinfo + i); out: return err; } /* make sure there is no invalid entries */ static_unless_ut int fhsm_invalid(struct aufhsm *fhsm, union aufs_brinfo *brinfo, int nbr) { int err, i, j, nwmark, found; struct aufhsm_wmark *wm; err = 0; nwmark = fhsm->nwmark; wm = fhsm->wmark; for (i = 0; i < nwmark; i++, wm++) { if (wm->brid < 0) { err++; continue; } found = 0; for (j = 0; !found && j < nbr; j++) found = (wm->brid == brinfo[i].id); if (!found) { wm->brid = -1; err++; } } return err; } /* re-initialize the un-initialized entries */ static_unless_ut void fhsm_reinit(struct aufhsm *fhsm, union aufs_brinfo *brinfo, int nbr) { int i, nwmark; struct aufhsm_wmark *wm, *prev; prev = NULL; nwmark = fhsm->nwmark; for (i = 0; i < nbr; i++) { if (!au_br_fhsm(brinfo[i].perm)) continue; if (brinfo[i].id < 0) { /* should not happen */ errno = EINVAL; AuFin("%s", brinfo[i].path); } /* no bsearch, since it is unsorted */ wm = au_wm_lfind(brinfo[i].id, fhsm->wmark, nwmark); if (wm) { prev = wm; continue; } wm = au_wm_lfind(-1, fhsm->wmark, nwmark); if (wm) { wm_init(wm, brinfo + i); if (prev) { memcpy(wm->block, prev->block, sizeof(wm->block)); memcpy(wm->inode, prev->inode, sizeof(wm->inode)); } } else { /* should not happen */ errno = EINVAL; AuFin("%s", brinfo[i].path); } } } /* * create struct aufhsm by mapping POSIX shared memory */ static int fhsm_map(char *name, int *rshmfd, struct aufhsm **rfhsm) { int err; struct aufhsm *p; err = au_shm_map(name, rshmfd, rfhsm); if (err) goto out; p = *rfhsm; if (au_fhsm_sign_verify(p)) goto out; err = -1; errno = EINVAL; AuLogErr("%s has broken signature", name); if (munmap(p, au_fhsm_size(p->nwmark))) AuLogErr("%s", name); if (close(*rshmfd)) AuLogErr("%s", name); out: return err; } /* * create struct aufhsm on shared memory from brinfo */ int au_fhsm(char *name, int nfhsm, int nbr, union aufs_brinfo *brinfo, int *rshmfd, struct aufhsm **rfhsm) { int err, invalid; struct aufhsm *p; err = fhsm_create(name, nfhsm, nbr, brinfo, rshmfd, rfhsm); p = *rfhsm; if (!err) goto out_sort; if (errno != EEXIST) goto out; err = fhsm_map(name, rshmfd, rfhsm); if (err) goto out; p = *rfhsm; if (p->nwmark < nfhsm) err = fhsm_expand(*rshmfd, rfhsm, nfhsm); else //if (p->nwmark > nfhsm) err = fhsm_shrink(*rshmfd, rfhsm, nfhsm); if (err) goto out_unmap; p = *rfhsm; invalid = fhsm_invalid(p, brinfo, nfhsm); if (invalid) fhsm_reinit(p, brinfo, nfhsm); out_sort: au_fhsm_sort_brid(p); if (!err) { au_fhsm_sign(p); goto out; /* success */ } out_unmap: if (munmap(p, au_fhsm_size(p->nwmark))) AuLogErr("unmap"); out: return err; } unsigned int au_fhsm_csum(struct aufhsm *fhsm) { unsigned int csum; off_t len; char *p; csum = 0; len = au_fhsm_size(fhsm->nwmark); len -= offsetof(struct aufhsm, nwmark); p = (void *)fhsm; p += offsetof(struct aufhsm, nwmark); for (; len; len--, p++) csum += *p; return csum; } /* ---------------------------------------------------------------------- */ /* * load struct aufhsm from the given shared memory */ struct aufhsm *au_fhsm_load(char *name) { struct aufhsm *fhsm, *p; off_t len; int err, fd; p = NULL; err = fhsm_map(name, &fd, &fhsm); if (err) goto out; len = au_fhsm_size(fhsm->nwmark); p = malloc(len); if (!p) { AuLogErr("malloc %d", fhsm->nwmark); goto out_fd; } memcpy(p, fhsm, len); err = munmap(fhsm, len); if (err) { p = NULL; AuLogErr("unmap"); } out_fd: err = close(fd); if (err) { p = NULL; AuLogErr("close"); } out: return p; } /* ---------------------------------------------------------------------- */ void au_fhsm_dump(char *mntpnt, struct aufhsm *fhsm, union aufs_brinfo *brinfo, int nbr) { int i; struct aufhsm_wmark *wm; printf("%s, %d watermark(s)\n", mntpnt, fhsm->nwmark); for (i = 0; i < nbr; i++) { wm = au_wm_search_brid(brinfo[i].id, fhsm); if (wm) printf("%s, %d, %.2f-%.2f %.2f-%.2f\n", brinfo[i].path, brinfo[i].id, (1 - wm->block[AuFhsm_WM_UPPER]) * 100, (1 - wm->block[AuFhsm_WM_LOWER]) * 100, (1 - wm->inode[AuFhsm_WM_UPPER]) * 100, (1 - wm->inode[AuFhsm_WM_LOWER]) * 100); } } fhsm/linux-list.h000066400000000000000000000032661315652647700143230ustar00rootroot00000000000000/* * borrowed from linux/include/linux/list.h */ #ifndef AuFhsm_LIST_H #define AuFhsm_LIST_H #include struct list_head { struct list_head *prev, *next; }; #define LIST_HEAD_INIT(name) { &(name), &(name) } #define LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) static inline void INIT_LIST_HEAD(struct list_head *list) { list->next = list; list->prev = list; } static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { next->prev = new; new->next = next; new->prev = prev; prev->next = new; } static inline void list_add(struct list_head *new, struct list_head *head) { __list_add(new, head, head->next); } static inline void __list_del(struct list_head * prev, struct list_head * next) { next->prev = prev; prev->next = next; } static inline void __list_del_entry(struct list_head *entry) { __list_del(entry->prev, entry->next); } static inline void list_del(struct list_head *entry) { __list_del(entry->prev, entry->next); entry->next = NULL; entry->prev = NULL; } static inline int list_empty(const struct list_head *head) { return head->next == head; } #define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) #define list_entry(ptr, type, member) container_of(ptr, type, member) #define list_for_each_entry(pos, head, member) \ for (pos = list_entry((head)->next, typeof(*pos), member); \ &pos->member != (head); \ pos = list_entry(pos->member.next, typeof(*pos), member)) #endif /* AuFhsm_LIST_H */ fhsm/list.c000066400000000000000000000176301315652647700131610ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, functions to handle the list */ #include #include #include #include #include #include #include #include "comm.h" #include "log.h" /* * for those who don't want to consume tmpfs, the directory to place several * (possibly large) file-list can be customized. */ static char *list_dir; int au_list_dir_set(char *dir, int need_ck) { int err; struct stat st; errno = EINVAL; err = -1; if (!dir || !*dir) { AuLogErr("internal error, empty string is not allowed"); goto out; } err = 0; if (!need_ck) goto out_set; err = access(dir, R_OK | W_OK | X_OK); if (err) { AuLogErr("%s", dir); goto out; } err = stat(dir, &st); if (err) { AuLogErr("%s", dir); goto out; } if (!S_ISDIR(st.st_mode)) { err = -1; errno = ENOTDIR; AuLogErr("%s", dir); } out_set: list_dir = dir; out: return err; } char *au_list_dir(void) { return list_dir; } char *au_list_dir_def(void) { char *dir, *name = "/aufs-dummy"; int err, fd; dir = NULL; fd = shm_open(name, O_RDWR | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR); if (fd < 0) { AuLogErr("shm_open"); goto out; } dir = au_shm_dir(fd); /* always remove */ err = shm_unlink(name); if (err) { AuLogErr("shm_unlink"); free(dir); dir = NULL; } out: return dir; } /* ---------------------------------------------------------------------- */ /* * Move the contents of failfd to listfd. */ static_unless_ut int move_failed(int listfd, int failfd) { int err, left, l; ssize_t ssz; off_t off; struct stat st; char *o, *src, *rev, *tgt, *succeeded; err = fstat(failfd, &st); if (err) { AuLogErr("fstat"); goto out; } if (!st.st_size) goto out; /* success */ o = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, failfd, 0); if (o == MAP_FAILED) { err = -1; AuLogErr("mmap"); goto out; } rev = malloc(st.st_size + 1); if (!rev) { err = -1; AuLogErr("malloc, %llu", (unsigned long long)st.st_size); goto out_unmap; } tgt = rev; *tgt++ = '\0'; left = st.st_size - 1; src = o; while (left > 0) { src = memrchr(o, '\0', left); if (src) src++; else src = o; l = strlen(src) + 1; /* including the terminator */ memcpy(tgt, src, l); tgt += l; left -= l; } if (tgt != rev + st.st_size + 1) AuLogFin("internal error, tgt %p, rev %p, sz %llu", tgt, rev, (unsigned long long)st.st_size); ssz = write(listfd, rev, st.st_size + 1); if (ssz != st.st_size + 1) goto out_ssz; err = ftruncate(failfd, 0); if (err) /* should not happen */ AuLogErr("ftruncate"); goto out_free; out_ssz: err = -1; AuLogErr("failed moving failfd %llu, %zd", (unsigned long long)st.st_size, ssz); if (ssz > 0) { /* wrote partially */ succeeded = memrchr(rev + ssz, '\0', ssz); off = lseek(failfd, SEEK_END, -(rev + ssz - succeeded)); if (off != -1) { if (ftruncate(listfd, off)) { /* should not happen */ AuLogErr("ftruncate, %llu", (unsigned long long)off); } } else { /* should not happen */ AuLogErr("SEEK_END, %llu", (unsigned long long)(rev + ssz - succeeded)); } } out_free: free(rev); out_unmap: if (munmap(o, st.st_size)) AuLogErr("munmap"); out: return err; } /* * if any signal is sent to the aufhsm-list process and killed, the result * list-file may be incomplete, which leads FHSM to inefficient behaviour. * in this case, the user should remove the list-file under /dev/shm manually. */ static_unless_ut int run_cmd(int brfd, char *dir, char *name) { int err, status; pid_t pid, waited; sigset_t new, old; char *av[] = {basename(AUFHSM_LIST_CMD), dir, name, NULL}; sigemptyset(&new); sigaddset(&new, SIGCHLD); err = sigprocmask(SIG_UNBLOCK, &new, &old); if (err) { AuLogErr("sigprocmask"); goto out; } pid = fork(); if (!pid) { /* child */ err = fchdir(brfd); if (!err) { execve(AUFHSM_LIST_CMD, av, environ); AuLogFin("aufhsm-list"); } else AuLogFin("fchdir"); } else if (pid > 0) { waited = waitpid(pid, &status, 0); if (waited == pid) err = WEXITSTATUS(status); else { /* should not happen */ err = -1; AuLogErr("waitpid"); } } else { err = pid; AuLogErr("fork"); } if (sigprocmask(SIG_BLOCK, &old, NULL)) AuLogErr("sigprocmask"); out: return err; } int au_list(int brfd, int *listfd, int *failfd) { int err, e, l, dirfd; char name[64], *dir; struct stat st; /* while their name contain 'shm', the file is not shared actually */ err = au_shm_name(brfd, name, sizeof(name)); if (err) goto out; dir = au_list_dir(); if (!dir) { AuLogErr("internal error, list dir is empty"); goto out; } dirfd = open(dir, O_RDONLY | O_PATH); if (dirfd < 0) { AuLogErr("%s", dir); goto out; } *listfd = openat(dirfd, name + 1, O_RDWR | O_CREAT | O_APPEND | O_CLOEXEC, S_IRUSR | S_IWUSR); err = *listfd; if (err < 0) { AuLogErr("%s/%s", dir, name + 1); goto out_dirfd; } l = strlen(name); strcpy(name + l, "-failed"); *failfd = openat(dirfd, name + 1, O_RDWR | O_CREAT | O_APPEND | O_CLOEXEC, S_IRUSR | S_IWUSR); err = *failfd; if (err < 0) { AuLogErr("%s/%s", dir, name + 1); goto out_listfd; } /* * if the list generated previously is not processed entirely, * then continue it. * but the previously failed files should be tried first. */ err = fstat(*listfd, &st); if (err) { AuLogErr("fstat"); goto out_failfd; } if (st.st_size) { /* the list-file is not emptry, re-use it */ err = move_failed(*listfd, *failfd); if (!err) goto out; /* success */ goto out_failfd; } /* the list-file is emptry, re-generate it */ name[l] = '\0'; err = run_cmd(brfd, dir, name + 1); if (!err) goto out_dirfd; /* success */ out_failfd: e = errno; if (close(*failfd)) AuLogErr("close"); errno = e; out_listfd: e = errno; if (close(*listfd)) AuLogErr("close"); errno = e; out_dirfd: e = errno; if (close(dirfd)) AuLogErr("close"); errno = e; out: return err; } /* get a single filename (from listfd) */ void au_fname_one(char *o, off_t len, struct au_fname *fname) { char *end; fname->atime = memrchr(o, 0, len - 1); if (fname->atime) { end = o + len; fname->len = end - fname->atime - 1; fname->atime++; } else { fname->len = len; fname->atime = o; } fname->sz = strchr(fname->atime, ' '); if (!fname->sz) AuLogFin("%s", fname->atime); fname->sz++; fname->name = strchr(fname->sz, ' '); if (!fname->name) AuLogFin("%s", fname->atime); fname->name++; } /* store the filename to failfd */ int au_fname_failed(struct au_fname *fname, int failfd) { int err, l, e; ssize_t ssz; off_t off; /* AuDbgFhsmLog("%s", fname->atime); */ err = 0; l = strlen(fname->atime) + 1; /* including the terminator */ ssz = write(failfd, fname->atime, l); if (ssz == l) goto out; /* success */ err = -1; e = errno; AuLogInfo("failed appending %s (%zd), skipped", fname->atime, ssz); if (ssz > 0) { /* wrote partially */ off = lseek(failfd, SEEK_END, -ssz); if (off != -1) { if (ftruncate(failfd, off)) { /* should not happen */ AuLogErr("ftruncate, %llu", (unsigned long long)off); } } else { /* should not happen */ AuLogErr("lseek SEEK_END, %zd", -ssz); } } errno = e; out: return err; } fhsm/log.c000066400000000000000000000015011315652647700127550ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, logging control */ int au_do_syslog; fhsm/log.h000066400000000000000000000047331315652647700127740ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, logging macros */ #ifndef AuFhsm_LOG_H #define AuFhsm_LOG_H #include #include #include #include #include #define AuFhsmd_OPTION (LOG_NDELAY | LOG_PID) #ifdef AUFHSM_UT #define AuFhsmd_FACILITY LOG_USER #else #define AuFhsmd_FACILITY LOG_DAEMON #endif extern int au_do_syslog; /* All message strings given should not end the NL char */ #define AuWarn(fmt, ...) do { \ int e = errno; \ fprintf(stderr, "%s[%d]:%s:%d: " fmt "\n", \ program_invocation_short_name, getpid(), \ __func__, __LINE__, ##__VA_ARGS__); \ errno = e; \ } while (0) #ifdef AUFHSM_UT #define AuDWarn AuWarn #else #define AuDWarn(fmt, ...) do {} while (0) #endif #define AuDoLog(level, fmt, ...) do { \ int e = errno; \ if (au_do_syslog) { \ syslog(level, "%s:%d: " fmt, \ __func__, __LINE__, ##__VA_ARGS__); \ } else \ AuWarn(fmt, ##__VA_ARGS__); \ errno = e; \ } while (0) #define AuLogErr(fmt, ...) \ AuDoLog(LOG_ERR, fmt ", %m", ##__VA_ARGS__) #define AuLogWarn(fmt, ...) \ AuDoLog(LOG_WARNING, fmt, ##__VA_ARGS__) #define AuLogWarn1(fmt, ...) do { \ static unsigned char cnt; \ if (!cnt++) \ AuDoLog(LOG_WARNING, fmt, ##__VA_ARGS__); \ } while (0) #define AuLogInfo(fmt, ...) \ AuDoLog(LOG_INFO, fmt, ##__VA_ARGS__) #define AuLogDbg(fmt, ...) \ AuDoLog(LOG_DEBUG, fmt, ##__VA_ARGS__) #ifdef AUFHSM_DBG #define AuDbgFhsmLog(fmt, ...) AuLogDbg(fmt, ##__VA_ARGS__) #else #define AuDbgFhsmLog(fmt, ...) do {} while (0) #endif #define AuLogFin(fmt, ...) do { \ AuLogErr(fmt, ##__VA_ARGS__); \ exit(EXIT_FAILURE); \ } while (0) #endif /* AuFhsm_LOG_H */ fhsm/msg.c000066400000000000000000000047731315652647700130000ustar00rootroot00000000000000/* * Copyright (C) 2011-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, messaging between the controller and the daemon */ #include #include #include #include #include #include #include #include "comm.h" #include "log.h" /* wait for the daemon terminate */ static void wait_for_terminate(int rootfd) { int fhsmfd; struct timespec ts; time_t until; until = time(NULL) + 15; do { fhsmfd = ioctl(rootfd, AUFS_CTL_FHSM_FD, /*oflags*/0); if (fhsmfd > 0) { close(fhsmfd); break; } else if (fhsmfd == -1 && (errno == EOPNOTSUPP || errno == EPERM)) break; ts.tv_sec = 0; ts.tv_nsec = 100 * 1000; nanosleep(&ts, NULL); } while (until > time(NULL)); } /* messaging between the controller and daemon */ int au_fhsm_msg(char *name, aufhsm_msg_t msg, int rootfd) { int err, fd; ssize_t ssz; char a[64], *dir; dir = au_list_dir(); if (!dir) { AuLogErr("internal error, au_list_dir"); err = -1; goto out; } err = snprintf(a, sizeof(a), "%s%s.msg", dir, name); if (err >= sizeof(a)) { AuLogErr("internal error, %d", err); err = -1; goto out; } err = mknod(a, S_IFIFO | S_IRUSR | S_IWUSR, /*dev*/0); if (err && errno != EEXIST) { AuLogErr("%s", a); goto out; } fd = open(a, O_RDWR | O_CLOEXEC | O_NONBLOCK); err = fd; if (fd < 0) { AuLogErr("%s", a); goto out; /* do not unlink the fifo */ } if (msg == AuFhsm_MSG_NONE) goto out; /* success, keep 'fd' opened and return it */ ssz = write(fd, &msg, sizeof(msg)); err = (ssz != sizeof(msg)); if (err) { AuLogErr("%s, %zd", a, ssz); goto out; } if (msg == AuFhsm_MSG_EXIT && rootfd >= 0) wait_for_terminate(rootfd); if (close(fd)) { err = -1; AuLogErr("close"); } /* do not unlink the fifo */ out: return err; } fhsm/mvdown.c000066400000000000000000000173701315652647700135210ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, retrieve a filename from the list and move it down one by one */ #include #include #include #include #include #include #include #include "../au_util.h" #include "daemon.h" #include "log.h" /* file global varibles */ static int listfd, failfd; static off_t listsz; static char *listp; static struct aufs_mvdown mvdown; static struct au_fname fname; /* * the return value zero means we should continue the operation, otherwise stop * it. */ static_unless_ut int do_mvdown1(int *done) { int err, fd, e; struct stat st; *done = 1; AuDbgFhsmLog("%s", fname.name); fd = openat(fhsmd.fd[AuFd_ROOT], fname.name, O_RDONLY); if (fd < 0) { err = -1; if (errno == ENOENT) err = 0; AuLogErr("%s", fname.name); goto out; } err = ioctl(fd, AUFS_CTL_MVDOWN, &mvdown); if (!err) { err = close(fd); if (err) AuLogErr("%s", fname.name); goto out; } e = errno; AuDbgFhsmLog("AUFS_CTL_MVDOWN %s, %m", fname.name); errno = e; switch (e) { case EROFS: /* * the same named file exists on the lower (but upper than next * writable) branch. simply skip it and continue the operation * if another error doesn't happen. */ if (au_opt_test(fhsmd.optflags, VERBOSE)) AuLogInfo("ignore %s, %m", fname.name); err = close(fd); if (err) AuLogErr("%s", fname.name); break; case EBUSY: /* * the file is in-use (or other reason), and we could not move * it down. if it is still single hard-linked, we will try again * later. * continue the operation if another error doesn't happen. */ *done = 0; if (au_opt_test(fhsmd.optflags, VERBOSE)) AuLogInfo("%s, %m", fname.name); err = fstat(fd, &st); if (err) AuLogErr("%s", fname.name); if (st.st_nlink == 1) err = au_fname_failed(&fname, failfd); if (!err) { err = close(fd); if (err) AuLogErr("%s", fname.name); } else { e = errno; if (close(fd)) AuLogErr("%s", fname.name); errno = e; } break; case EINVAL: /* * continue the operation if another error doesn't happen. */ *done = 0; if (au_opt_test(fhsmd.optflags, VERBOSE)) { char *s = "??"; if (0 <= mvdown.au_errno && mvdown.au_errno < EAU_Last) s = (char *)au_errlist[mvdown.au_errno]; AuLogInfo("%s, %s", fname.name, s); } err = close(fd); if (err) AuLogErr("%s", fname.name); break; case ENOSPC: /* * the target branch is full. stop moving-down. */ /*FALLTHROUGH*/ default: /* unknown reason */ *done = 0; AuLogErr("%s", fname.name); e = errno; if (close(fd)) AuLogErr("%s", fname.name); errno = e; } out: AuDbgFhsmLog("err %d", err); return err; } static_unless_ut int test_usage(struct aufs_stbr *stbr) { float block, inode; struct aufhsm_wmark *wm; AuDbgFhsmLog("%llu/%llu, %llu/%llu, %d", (unsigned long long)stbr->stfs.f_bavail, (unsigned long long)stbr->stfs.f_blocks, (unsigned long long)stbr->stfs.f_ffree, (unsigned long long)stbr->stfs.f_files, stbr->brid); block = (float)stbr->stfs.f_bavail / stbr->stfs.f_blocks; inode = (float)stbr->stfs.f_ffree / stbr->stfs.f_files; AuDbgFhsmLog("%d, free, block %f, inode %f", stbr->brid, block, inode); wm = au_wm_search_brid(stbr->brid, fhsmd.lcopy); if (wm) /* free ratio */ return block < wm->block[AuFhsm_WM_UPPER] || (wm->inode[AuFhsm_WM_UPPER] < 1 && inode < wm->inode[AuFhsm_WM_UPPER]); return 0; } static int stbr_compar(const void *_brid, const void *_stbr) { int brid = (long)_brid; const struct aufs_stbr *stbr = _stbr; return brid - stbr->brid; } static struct aufs_stbr *au_stbr_bsearch(int brid, struct aufs_stbr *stbr, int nstbr) { long l = brid; return bsearch((void *)l, stbr, nstbr, sizeof(*stbr), stbr_compar); } /* * return tri-state. * plus: the file was skipped, the caller should proceed the list. * zero: the file was moved-down and the list is completed. * minus: error. */ static_unless_ut int do_mvdown(struct aufs_stbr *cur, struct aufs_stbr **next) { int err, done; au_fname_one(listp, listsz, &fname); mvdown.flags = AUFS_MVDOWN_FHSM_LOWER | AUFS_MVDOWN_OWLOWER | AUFS_MVDOWN_STFS; mvdown.stbr[AUFS_MVDOWN_UPPER].brid = cur->brid; err = do_mvdown1(&done); if (err) goto out; err = ftruncate(listfd, listsz - fname.len); if (err) { AuLogErr("list-file after %s", fname.name); goto out; } *next = NULL; listsz -= fname.len; if (listsz && !done) { err = 1; goto out; } if (mvdown.flags & AUFS_MVDOWN_STFS_FAILED) { // warning here goto out; } cur->stfs = mvdown.stbr[AUFS_MVDOWN_UPPER].stfs; assert(cur->brid == mvdown.stbr[AUFS_MVDOWN_UPPER].brid); if (!(mvdown.flags & AUFS_MVDOWN_BOTTOM)) { *next = au_stbr_bsearch(mvdown.stbr[AUFS_MVDOWN_LOWER].brid, fhsmd.comm->stbr, fhsmd.comm->nstbr); if (*next) **next = mvdown.stbr[AUFS_MVDOWN_LOWER]; } out: AuDbgFhsmLog("err %d", err); return err; } /* * In move-down, We have to free the several resources with keeping the error * status. By implementing as a child process, we can do it as simple exit(). */ static_unless_ut int mvdown_child(struct aufs_stbr *cur, struct aufs_stbr **next) { int err, brfd; ssize_t ssz; struct stat st; struct aufs_wbr_fd wbrfd; struct signalfd_siginfo ssi; wbrfd.oflags = O_CLOEXEC; wbrfd.brid = cur->brid; brfd = ioctl(fhsmd.fd[AuFd_ROOT], AUFS_CTL_WBR_FD, &wbrfd); if (brfd < 0) { err = brfd; AuLogErr("AUFS_CTL_WBR_FD"); goto out; } err = au_list(brfd, &listfd, &failfd); if (err) goto out; err = fstat(listfd, &st); if (err < 0) { AuLogErr("listfd"); goto out; } AuDbgFhsmLog("st_size %llu", (unsigned long long)st.st_size); if (!st.st_size) goto out; /* nothing to move-down */ listp = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, listfd, 0); if (listp == MAP_FAILED) AuLogFin("mmap"); *next = NULL; listsz = st.st_size; while (fhsmd.comm->msg != AuFhsm_MSG_EXIT) { ssz = read(fhsmd.fd[AuFd_SIGNAL], &ssi, sizeof(ssi)); if (ssz != -1) break; err = do_mvdown(cur, next); AuDbgFhsmLog("err %d, listsz %llu", err, (unsigned long long)listsz); if (err <= 0 //|| !listsz || *next || !test_usage(cur)) break; } out: AuDbgFhsmLog("err %d", err); return err; } int au_mvdown_run(struct aufs_stbr *cur, struct aufs_stbr **next) { int err; pid_t pid; struct in_ope *in_ope; err = 0; *next = NULL; AuLogInfo("brid %d", cur->brid); if (!test_usage(cur)) goto out; if (!au_opt_test(fhsmd.optflags, NODAEMON)) { list_for_each_entry(in_ope, &fhsmd.in_ope, list) { if (in_ope->brid == cur->brid) goto out; } in_ope = malloc(sizeof(*in_ope)); if (!in_ope) { err = -1; AuLogErr("malloc"); } in_ope->brid = cur->brid; in_ope->pid = 0; list_add(&in_ope->list, &fhsmd.in_ope); pid = fork(); if (!pid) { err = mvdown_child(cur, next); AuDbgFhsmLog("err %d", err); exit(err); } else if (pid > 0) { in_ope->pid = pid; } else if (pid < 0) { err = pid; AuLogErr("fork"); } } else err = mvdown_child(cur, next); out: return err; } fhsm/shm.c000066400000000000000000000111011315652647700127600ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, functions for POSIX Shared Memory management and Semaphore. * I'll bet that you would be confused by the terms 'fhsm' and 'shm'. */ #include #include #include #include #include #include #include #include "comm.h" #include "log.h" /* genarate an unique name for a mounted aufs */ int au_shm_name(int rootfd, char name[], int sz) { int err; struct stat st; err = fstat(rootfd, &st); if (err) { AuLogErr("fstat"); goto out; } /* * statfs.f_fsid may not be maintained. * use device id instead. */ err = snprintf(name, sz, "/%s-%04x%04x-%llu", program_invocation_short_name, major(st.st_dev), minor(st.st_dev), (unsigned long long)st.st_ino); if (0 < err && err < sz) { err = 0; goto out; } err = -1; errno = E2BIG; AuLogErr("internal error, snprintf %d, %d", err, sz); out: return err; } /* dir part of POSIX shm */ char *au_shm_dir(int fd) { char *dir, *p, fdpath[32]; int err; struct stat st; ssize_t ssz; dir = NULL; err = snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", fd); if (err >= sizeof(fdpath)) { AuLogErr("internal error, %d >= %d", err, (int)sizeof(fdpath)); goto out; } err = fstatat(AT_FDCWD, fdpath, &st, AT_SYMLINK_NOFOLLOW); if (err) { AuLogErr("fstatat"); goto out; } dir = malloc(st.st_size + 1); if (!dir) { AuLogErr("malloc"); goto out; } ssz = readlink(fdpath, dir, st.st_size); if (ssz < 0 || ssz > st.st_size) { AuLogErr("readlink"); goto out; } p = memrchr(dir, '/', ssz); if (!p) { AuLogErr("memrchr"); goto out; } if (p != dir) *p = '\0'; out: return dir; } /* ---------------------------------------------------------------------- */ /* * open and lock. * to unlock, just close(2). */ static_unless_ut int au_shm_open(char *name, int oflags, mode_t mode) { int fd, err, e; struct statfs stfs; struct flock fl = { .l_type = F_RDLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = 0 // the whole file }; fd = shm_open(name, oflags, mode); if (fd < 0) { /* keep this errno */ if (errno != EEXIST) { e = errno; AuLogErr("%s", name); errno = e; } goto out; } if (oflags & (O_WRONLY | O_RDWR)) fl.l_type = F_WRLCK; err = fcntl(fd, F_SETLKW, &fl); if (err) { AuLogErr("F_SETLKW"); goto out_fd; } err = fstatfs(fd, &stfs); if (!err) { if (stfs.f_type == AUFS_SUPER_MAGIC) AuLogWarn1("%s should not be aufs (not an error)", name); goto out; /* success */ } AuLogErr("%s", name); out_fd: e = errno; if (close(fd)) AuLogErr("%s", name); errno = e; fd = -1; out: return fd; } /* create, lock, and mmap */ int au_shm_create(char *name, off_t len, int *rfd, void *_p) { int err, e; void **p = _p; *rfd = au_shm_open(name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); err = *rfd; if (*rfd < 0) /* keep this errno */ goto out; err = ftruncate(*rfd, len); if (err) { e = errno; AuLogErr("%s", name); goto out_fd; } /* todo: alignment? */ *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, *rfd, 0); if (*p != MAP_FAILED) goto out; /* success */ err = -1; e = errno; AuLogErr("%s", name); out_fd: if (close(*rfd)) AuLogErr("%s", name); if (shm_unlink(name)) AuLogErr("%s", name); errno = e; out: return err; } /* open, lock, and mmap */ int au_shm_map(char *name, int *rfd, void *_p) { int err; struct stat st; void **p = _p; *rfd = au_shm_open(name, O_RDWR, S_IRUSR | S_IWUSR); err = *rfd; if (*rfd < 0) goto out; err = fstat(*rfd, &st); if (err) { AuLogErr("%s", name); goto out_fd; } *p = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, *rfd, 0); if (*p != MAP_FAILED) goto out; /* success */ err = -1; AuLogErr("%s", name); out_fd: if (close(*rfd)) AuLogErr("%s", name); out: return err; } libau/000077500000000000000000000000001315652647700121725ustar00rootroot00000000000000libau/Makefile000066400000000000000000000057361315652647700136450ustar00rootroot00000000000000# Copyright (C) 2005-2017 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA LibSoMajor = 2 LibSoMinor = 10 LibSo = libau.so LibSoObj = libau.o \ rdu_lib.o rdu.o \ pathconf.o LibSoHdr = libau.h rdu.h LibSoExport = $(addsuffix .exp, $(basename ${LibSoObj})) LibSoExportErr = $(addsuffix .err, ${LibSoExport}) LibSoVerScript = verscript STRIP ?= strip all: ${LibSo} ifeq (${Glibc},yes) LibSoObj += rdu64.o # this is unnecessary on 64bit system? rdu64.c: rdu.c ln -sf $< $@ rdu64.o: override CPPFLAGS += -DRdu64 .INTERMEDIATE: rdu64.c else CPPFLAGS += -I../${ExtlibPath} LibSoObj += error_at_line.o LibSoHdr += ../${ExtlibPath}/error_at_line.h error_at_line.c: %: ../${ExtlibPath}/% ln -sf $< $@ .INTERMEDIATE: error_at_line.c error_at_line.h endif ${LibSoObj}: override CPPFLAGS += -DNDEBUG -D_REENTRANT -I. ${LibSoObj}: override CFLAGS += -fPIC ${LibSoObj}: %.o: %.c ${LibSoHdr} ${LibSoExport}: %.exp: %.o readelf -p EXP $< 2> $@.err |\ fgrep ']' |\ cut -f2 -d']' > $@ fgrep -v 'it does not exist' $@.err || true ${LibSoVerScript}: ${LibSoExport} { \ echo '{ global:'; \ sort $^ |\ uniq |\ paste -d';' - /dev/null; \ echo 'local: *; };'; \ } > $@ ${RM} ${LibSoExportErr} .INTERMEDIATE: ${LibSoExport} ${LibSoExportErr} ${LibSoVerScript} ${LibSo}: ${LibSo}.${LibSoMajor} ln -sf $< $@ ${LibSo}.${LibSoMajor}: ${LibSo}.${LibSoMajor}.${LibSoMinor} ln -sf $< $@ ${LibSo}.${LibSoMajor}.${LibSoMinor}: override LDFLAGS += -s ${LibSo}.${LibSoMajor}.${LibSoMinor}: override LDLIBS += -ldl -lpthread ${LibSo}.${LibSoMajor}.${LibSoMinor}:| ${LibSoVerScript} ${LibSo}.${LibSoMajor}.${LibSoMinor}: ${LibSoObj} ${CC} --shared -Wl,-soname,${LibSo}.${LibSoMajor} \ -Wl,--version-script,${LibSoVerScript} \ ${LDFLAGS} -o $@ $^ ${LDLIBS} ${STRIP} -R EXP $@ # readelf --syms --use-dynamic libau.so install_ulib: File = ${LibSo}.${LibSoMajor}.${LibSoMinor} install_ulib: Tgt = ${DESTDIR}${LibAuDir} install_ulib: ${File} $(warning LibAuDir=${LibAuDir}) ${INSTALL} -dv ${Tgt} ${Install} -m 644 ${File} ${Tgt} # -m 755 ln -sf ${File} ${Tgt}/${LibSo}.${LibSoMajor} ln -sf ${LibSo}.${LibSoMajor} ${Tgt}/${LibSo} install: install_ulib clean: ${RM} ${LibSo} ${LibSo}.* ${LibSoObj} *~ ${RM} ${LibSoExport} ${LibSoExportErr} ${LibSoVerScript} for i in rdu64.c error_at_line.[ch]; \ do test -L $${i} && ${RM} $${i} || :; \ done -include priv.mk libau/libau.c000066400000000000000000000030151315652647700134310ustar00rootroot00000000000000/* * Copyright (C) 2009-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include "libau.h" int libau_dl(void **real, char *sym) { char *p; if (*real) return 0; dlerror(); /* clear */ *real = dlsym(RTLD_NEXT, sym); p = dlerror(); if (p) fprintf(stderr, "%s\n", p); return !!p; } /* always retrieve the var from environment, since it can be changed anytime */ int libau_test_func(char *sym) { int ret, l; char *e; ret = 0; e = getenv(LibAuEnv); if (!e) goto out; DPri("e 0x%02x, %s\n", *e, e); ret = !*e || !strcasecmp(e, "all"); if (ret) goto out; l = strlen(sym); while (!ret && (e = strstr(e, sym))) { DPri("%s, l %d, %c\n", e, l, e[l]); ret = (!e[l] || e[l] == ':'); e++; } out: DPri("%s %d\n", sym, ret); return ret; } libau/libau.h000066400000000000000000000032371315652647700134440ustar00rootroot00000000000000/* * Copyright (C) 2009-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __libau_h__ #define __libau_h__ #include #ifdef __GNU_LIBRARY__ #include #else #include "error_at_line.h" #endif int libau_dl(void **real, char *sym); int libau_test_func(char *sym); #define LibAuEnv "LIBAU" #define LibAuDlFunc(sym) \ static inline int libau_dl_##sym(void) \ { \ return libau_dl((void *)&real_##sym, #sym); \ } \ /* EXP section will be removed from the shared object */ \ static char hooked_##sym[] __attribute__ ((section ("EXP"), used)) = #sym #define LibAuStr(sym) #sym #define LibAuStr2(sym) LibAuStr(sym) #define LibAuTestFunc(sym) libau_test_func(LibAuStr2(sym)) /* ---------------------------------------------------------------------- */ /* #define LibAuDebug */ #ifdef LibAuDebug #define DPri(fmt, ...) fprintf(stderr, "%s:%d: " fmt, \ __func__, __LINE__, ##__VA_ARGS__) #else #define DPri(fmt, ...) do {} while (0) #endif #endif /* __libau_h__ */ libau/pathconf.c000066400000000000000000000102371315652647700141430ustar00rootroot00000000000000/* * Copyright (C) 2009-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include /* or */ #include #include #include #include #include #include #include #include "libau.h" static long (*real_pathconf)(const char *path, int name); LibAuDlFunc(pathconf); static long (*real_fpathconf)(int fd, int name); LibAuDlFunc(fpathconf); static long do_fpathconf(int fd, int name) { long err; int wbr_fd, e; err = -1; wbr_fd = ioctl(fd, AUFS_CTL_WBR_FD, NULL); if (wbr_fd >= 0) { if (!libau_dl_fpathconf()) err = real_fpathconf(wbr_fd, name); e = errno; close(wbr_fd); errno = e; } return err; } static int open_aufs_fd(const char *path, DIR **rdp) { int err, l, e; struct stat base, st; char *parent, *p; *rdp = NULL; err = open(path, O_RDONLY); if (err >= 0) goto out; /* success */ switch (errno) { case EISDIR: *rdp = opendir(path); if (*rdp) { err = dirfd(*rdp); goto out; /* success */ } break; case EACCES: /*FALLTHROUGH*/ case EPERM: /* let's try with the parent dir again */ break; default: /* no way */ goto out; } /* * when open(2) for the specified path failed, * then try opening its ancestor instead in order to get a file * descriptor in aufs. */ err = stat(path, &base); if (err) goto out; parent = malloc(strlen(path) + sizeof("/..")); if (!parent) goto out; l = strlen(path); while (path[l - 1] == '/') l--; memcpy(parent, path, l); parent[l - 1] = 0; while (1) { strcat(parent, "/.."); err = stat(parent, &st); if (err) break; err = -1; errno = ENOTSUP; if (st.st_dev != base.st_dev) { error_at_line(0, errno, __FILE__, __LINE__, "cannot handle %s\n", path); break; } *rdp = opendir(parent); if (*rdp) { err = dirfd(*rdp); break; /* success */ } p = realloc(parent, strlen(parent) + sizeof("/..")); if (p) parent = p; else break; } e = errno; free(parent); errno = e; out: return err; } static long libau_pathconf(const char *path, int name) { long err; struct statfs stfs; int fd, e; DIR *dp; err = statfs(path, &stfs); if (err) goto out; err = -1; if (stfs.f_type == AUFS_SUPER_MAGIC) { fd = open_aufs_fd(path, &dp); if (fd >= 0) { err = do_fpathconf(fd, name); e = errno; if (!dp) close(fd); /* ignore */ else closedir(dp); /* ignore */ errno = e; } } else if (!libau_dl_pathconf()) err = real_pathconf(path, name); out: return err; } long pathconf(const char *path, int name) { long ret; ret = -1; if (name == _PC_LINK_MAX && (LibAuTestFunc(pathconf) || LibAuTestFunc(fpathconf))) ret = libau_pathconf(path, name); else if (!libau_dl_pathconf()) ret = real_pathconf(path, name); return ret; } /* ---------------------------------------------------------------------- */ static long libau_fpathconf(int fd, int name) { long err; struct statfs stfs; err = fstatfs(fd, &stfs); if (err) goto out; err = -1; if (stfs.f_type == AUFS_SUPER_MAGIC) err = do_fpathconf(fd, name); else if (!libau_dl_fpathconf()) err = real_fpathconf(fd, name); out: return err; } long fpathconf(int fd, int name) { long ret; ret = -1; if (name == _PC_LINK_MAX && (LibAuTestFunc(pathconf) || LibAuTestFunc(fpathconf))) ret = libau_fpathconf(fd, name); else if (!libau_dl_fpathconf()) ret = real_fpathconf(fd, name); return ret; } libau/rdu.c000066400000000000000000000060471315652647700131370ustar00rootroot00000000000000/* * Copyright (C) 2009-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include /* or */ #include #include #include #include "rdu.h" static int rdu_pos(struct Rdu_DIRENT *de, struct rdu *p, long pos) { int err; struct au_rdu_ent *ent; err = -1; if (pos < p->npos) { ent = p->pos[pos]; de->d_ino = ent->ino; de->d_off = pos; de->d_reclen = au_rdu_len(ent->nlen); de->d_type = ent->type; strcpy(de->d_name, ent->name); err = 0; } return err; } static int rdu_readdir(DIR *dir, struct Rdu_DIRENT *de, struct Rdu_DIRENT **rde) { int err, fd; struct rdu *p; long pos; struct statfs stfs; if (rde) *rde = NULL; errno = EBADF; fd = dirfd(dir); err = fd; if (fd < 0) goto out; err = fstatfs(fd, &stfs); if (err) goto out; errno = 0; if (stfs.f_type == AUFS_SUPER_MAGIC) { err = rdu_lib_init(); if (err) goto out; p = rdu_buf_lock(fd); if (!p) goto out; pos = telldir(dir); if (!pos || !p->npos) { err = rdu_init(p, /*want_de*/!de); if (err) { int e = errno; rdu_free(p); errno = e; goto out; } } if (!de) { de = p->de; if (!de) { rdu_unlock(p); errno = EINVAL; err = -1; goto out; } } err = rdu_pos(de, p, pos); if (!err) *rde = de; else err = 0; seekdir(dir, pos + 1); rdu_unlock(p); errno = 0; } else if (!de) { if (!Rdu_DL_READDIR()) { err = 0; *rde = Rdu_REAL_READDIR(dir); if (!*rde) err = -1; } } else { if (!Rdu_DL_READDIR_R()) err = Rdu_REAL_READDIR_R(dir, de, rde); } out: /* follow the behaviour of glibc */ if (err && errno == ENOENT) errno = 0; return err; } struct Rdu_DIRENT *(*Rdu_REAL_READDIR)(DIR *dir); struct Rdu_DIRENT *Rdu_READDIR(DIR *dir) { struct Rdu_DIRENT *de; int err __attribute__((unused)); if (LibAuTestFunc(Rdu_READDIR)) { err = rdu_readdir(dir, NULL, &de); /* DPri("err %d\n", err); */ } else if (!Rdu_DL_READDIR()) de = Rdu_REAL_READDIR(dir); else de = NULL; return de; } #ifdef _REENTRANT int (*Rdu_REAL_READDIR_R)(DIR *dir, struct Rdu_DIRENT *de, struct Rdu_DIRENT **rde); int Rdu_READDIR_R(DIR *dir, struct Rdu_DIRENT *de, struct Rdu_DIRENT **rde) { if (LibAuTestFunc(Rdu_READDIR_R)) return rdu_readdir(dir, de, rde); else if (!Rdu_DL_READDIR_R()) return Rdu_REAL_READDIR_R(dir, de, rde); else return errno; } #endif libau/rdu.h000066400000000000000000000073301315652647700131400ustar00rootroot00000000000000/* * Copyright (C) 2009-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __rdu_h__ #define __rdu_h__ #ifdef _REENTRANT #include #endif #include #include #include #include "libau.h" #ifdef Rdu64 #define Rdu_DIRENT dirent64 #define Rdu_READDIR readdir64 #define Rdu_READDIR_R readdir64_r #define Rdu_REAL_READDIR real_readdir64 #define Rdu_REAL_READDIR_R real_readdir64_r #define Rdu_DL_READDIR libau_dl_readdir64 #define Rdu_DL_READDIR_R libau_dl_readdir64_r #else #define Rdu_DIRENT dirent #define Rdu_READDIR readdir #define Rdu_READDIR_R readdir_r #define Rdu_REAL_READDIR real_readdir #define Rdu_REAL_READDIR_R real_readdir_r #define Rdu_DL_READDIR libau_dl_readdir #define Rdu_DL_READDIR_R libau_dl_readdir_r #endif /* ---------------------------------------------------------------------- */ struct rdu { #ifdef _REENTRANT pthread_rwlock_t lock; #endif int fd, shwh; struct Rdu_DIRENT *de; unsigned long long npos, idx; struct au_rdu_ent **pos; unsigned long long nent, sz; union au_rdu_ent_ul ent; struct au_rdu_ent *real, *wh; }; /* rdu_lib.c */ int rdu_lib_init(void); struct rdu *rdu_buf_lock(int fd); int rdu_init(struct rdu *p, int want_de); void rdu_free(struct rdu *p); /* ---------------------------------------------------------------------- */ extern struct Rdu_DIRENT *(*Rdu_REAL_READDIR)(DIR *dir); extern int (*Rdu_REAL_READDIR_R)(DIR *dir, struct Rdu_DIRENT *de, struct Rdu_DIRENT **rde); #ifdef Rdu64 LibAuDlFunc(readdir64); #ifdef _REENTRANT LibAuDlFunc(readdir64_r); #else #define libau_dl_readdir64_r() 1 #endif #else /* Rdu64 */ LibAuDlFunc(readdir); #ifdef _REENTRANT LibAuDlFunc(readdir_r); #else #define libau_dl_readdir_r() 1 #endif #endif /* Rdu64 */ /* ---------------------------------------------------------------------- */ #ifdef _REENTRANT extern pthread_mutex_t rdu_lib_mtx; #define rdu_lib_lock() pthread_mutex_lock(&rdu_lib_mtx) #define rdu_lib_unlock() pthread_mutex_unlock(&rdu_lib_mtx) #define rdu_lib_must_lock() assert(pthread_mutex_trylock(&rdu_lib_mtx)) static inline void rdu_rwlock_init(struct rdu *p) { pthread_rwlock_init(&p->lock, NULL); } static inline void rdu_read_lock(struct rdu *p) { rdu_lib_must_lock(); pthread_rwlock_rdlock(&p->lock); } static inline void rdu_write_lock(struct rdu *p) { rdu_lib_must_lock(); pthread_rwlock_wrlock(&p->lock); } static inline void rdu_unlock(struct rdu *p) { pthread_rwlock_unlock(&p->lock); } static inline void rdu_dgrade_lock(struct rdu *p) { rdu_unlock(p); rdu_read_lock(p); } #else #define rdu_lib_lock() do {} while(0) #define rdu_lib_unlock() do {} while(0) #define rdu_lib_must_lock() do {} while(0) static inline void rdu_rwlock_init(struct rdu *p) { /* empty */ } static inline void rdu_read_lock(struct rdu *p) { /* empty */ } static inline void rdu_write_lock(struct rdu *p) { /* empty */ } static inline void rdu_unlock(struct rdu *p) { /* empty */ } static inline void rdu_dgrade_lock(struct rdu *p) { /* empty */ } #endif /* _REENTRANT */ #endif /* __rdu_h__ */ libau/rdu_lib.c000066400000000000000000000235061315652647700137640ustar00rootroot00000000000000/* * Copyright (C) 2009-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include /* or */ #include #include #include #include #include #include #include "rdu.h" static struct rdu **rdu; #define RDU_STEP 8 static int rdu_cur, rdu_lim = RDU_STEP; /* ---------------------------------------------------------------------- */ static int rdu_getent(struct rdu *p, struct aufs_rdu *param) { int err; DPri("param{%llu, %p, %u | %u | %p, %llu, %u, %d |" " %llu, %d, 0x%x, %u}\n", param->sz, param->ent.e, param->verify[AufsCtlRduV_SZ], param->blk, param->tail.e, param->rent, param->shwh, param->full, param->cookie.h_pos, param->cookie.bindex, param->cookie.flags, param->cookie.generation); err = ioctl(p->fd, AUFS_CTL_RDU, param); DPri("param{%llu, %p, %u | %u | %p, %llu, %u, %d |" " %llu, %d, 0x%x, %u}\n", param->sz, param->ent.e, param->verify[AufsCtlRduV_SZ], param->blk, param->tail.e, param->rent, param->shwh, param->full, param->cookie.h_pos, param->cookie.bindex, param->cookie.flags, param->cookie.generation); return err; } /* ---------------------------------------------------------------------- */ #ifdef _REENTRANT pthread_mutex_t rdu_lib_mtx = PTHREAD_MUTEX_INITIALIZER; #endif /* * initialize this library, particularly global variables. */ int rdu_lib_init(void) { int err; err = 0; if (rdu) goto out; rdu_lib_lock(); if (!rdu) { rdu = calloc(rdu_lim, sizeof(*rdu)); err = !rdu; } rdu_lib_unlock(); out: return err; } static int rdu_append(struct rdu *p) { int err, i; void *t; rdu_lib_must_lock(); err = 0; if (rdu_cur <= rdu_lim - 1) rdu[rdu_cur++] = p; else { t = realloc(rdu, (rdu_lim + RDU_STEP) * sizeof(*rdu)); if (t) { rdu = t; rdu_lim += RDU_STEP; rdu[rdu_cur++] = p; for (i = 0; i < RDU_STEP - 1; i++) rdu[rdu_cur + i] = NULL; } else err = -1; } return err; } /* ---------------------------------------------------------------------- */ static struct rdu *rdu_new(int fd) { struct rdu *p; int err; p = malloc(sizeof(*p)); if (p) { rdu_rwlock_init(p); p->fd = fd; p->de = NULL; p->pos = NULL; p->sz = BUFSIZ; p->ent.e = NULL; err = rdu_append(p); if (!err) goto out; /* success */ } free(p); p = NULL; out: return p; } struct rdu *rdu_buf_lock(int fd) { struct rdu *p; int i; assert(fd >= 0); p = NULL; rdu_lib_lock(); if (!rdu) goto out; for (i = 0; i < rdu_cur; i++) if (rdu[i] && rdu[i]->fd == fd) { p = rdu[i]; goto out; } for (i = 0; i < rdu_cur; i++) if (rdu[i] && rdu[i]->fd == -1) { p = rdu[i]; p->fd = fd; goto out; } if (!p) p = rdu_new(fd); out: rdu_lib_unlock(); if (p) { rdu_write_lock(p); if (p->fd < 0) { rdu_unlock(p); p = NULL; } } return p; } void rdu_free(struct rdu *p) { assert(p); p->fd = -1; free(p->pos); free(p->ent.e); free(p->de); p->de = NULL; p->pos = NULL; p->ent.e = NULL; rdu_unlock(p); } /* ---------------------------------------------------------------------- */ /* the heart of this library */ static int do_store; /* a dirty interface of tsearch(3) */ static void rdu_store(struct rdu *p, struct au_rdu_ent *ent) { /* DPri("%s\n", ent->name); */ p->pos[p->idx++] = ent; } static int rdu_ent_compar(const void *_a, const void *_b) { int ret; const struct au_rdu_ent *a = _a, *b = _b; ret = strcmp(a->name, b->name); do_store = !!ret; /* DPri("%s, %s, %d\n", a->name, b->name, ret); */ return ret; } static int rdu_ent_compar_wh1(const void *_a, const void *_b) { int ret; const struct au_rdu_ent *a = _a, *b = _b; if (a->nlen <= AUFS_WH_PFX_LEN || memcmp(a->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) ret = strcmp(a->name, b->name + AUFS_WH_PFX_LEN); else ret = strcmp(a->name + AUFS_WH_PFX_LEN, b->name); /* DPri("%s, %s, %d\n", a->name, b->name, ret); */ return ret; } static int rdu_ent_compar_wh2(const void *_a, const void *_b) { int ret; const struct au_rdu_ent *a = _a, *b = _b; ret = strcmp(a->name + AUFS_WH_PFX_LEN, b->name + AUFS_WH_PFX_LEN); do_store = !!ret; /* DPri("%s, %s, %d\n", a->name, b->name, ret); */ return ret; } static int rdu_ent_append(struct rdu *p, struct au_rdu_ent *ent) { int err; err = 0; if (tfind(ent, (void *)&p->wh, rdu_ent_compar_wh1)) goto out; if (tsearch(ent, (void *)&p->real, rdu_ent_compar)) { if (do_store) rdu_store(p, ent); } else err = -1; out: return err; } static int rdu_ent_append_wh(struct rdu *p, struct au_rdu_ent *ent) { int err; err = 0; ent->wh = 1; if (tsearch(ent, (void *)&p->wh, rdu_ent_compar_wh2)) { if (p->shwh && do_store) rdu_store(p, ent); } else err = -1; return err; } static void rdu_tfree(void *node) { /* empty */ } static int rdu_merge(struct rdu *p) { int err; unsigned long long ul; union au_rdu_ent_ul u; void *t; err = -1; #if 0 u = p->ent; for (ul = 0; ul < p->npos; ul++) { DPri("%p, %.*s\n", u.e, u.e->nlen, u.e->name); u.ul += au_rdu_len(u.e->nlen); } #endif p->pos = realloc(p->pos, sizeof(*p->pos) * p->npos); if (!p->pos) goto out; err = 0; p->idx = 0; p->real = NULL; p->wh = NULL; u = p->ent; for (ul = 0; !err && ul < p->npos; ul++) { /* DPri("%s\n", u.e->name); */ u.e->wh = 0; do_store = 1; if (u.e->nlen <= AUFS_WH_PFX_LEN || memcmp(u.e->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) err = rdu_ent_append(p, u.e); else err = rdu_ent_append_wh(p, u.e); u.ul += au_rdu_len(u.e->nlen); } tdestroy(p->real, rdu_tfree); tdestroy(p->wh, rdu_tfree); if (err) { free(p->pos); p->pos = NULL; goto out; } else if (p->idx == p->npos) goto out; /* success */ p->npos = p->idx; /* t == NULL is not an error */ t = realloc(p->pos, sizeof(*p->pos) * p->idx); if (t) p->pos = t; u = p->ent; for (ul = 0; ul < p->npos; ul++) { if (p->pos[ul] != u.e) break; u.ul += au_rdu_len(u.e->nlen); } for (; ul < p->npos; ul++) { memmove(u.e, p->pos[ul], au_rdu_len(p->pos[ul]->nlen)); p->pos[ul] = u.e; u.ul += au_rdu_len(u.e->nlen); } out: return err; } int rdu_init(struct rdu *p, int want_de) { int err; unsigned long long used; struct aufs_rdu param; char *t; struct au_rdu_ent *e; memset(¶m, 0, sizeof(param)); param.verify[AufsCtlRduV_SZ] = sizeof(param); param.sz = p->sz; param.ent = p->ent; param.tail = param.ent; if (!param.ent.e) { err = -1; param.ent.e = malloc(param.sz); if (!param.ent.e) goto out; p->ent = param.ent; } t = getenv("AUFS_RDU_BLK"); if (t) param.blk = strtoul(t + sizeof("AUFS_RDU_BLK"), NULL, 0); p->npos = 0; while (1) { param.full = 0; err = rdu_getent(p, ¶m); if (err || !param.rent) break; p->npos += param.rent; if (!param.full) continue; assert(param.blk); e = realloc(p->ent.e, p->sz + param.blk); if (e) { used = param.tail.ul - param.ent.ul; DPri("used %llu\n", used); param.sz += param.blk - used; DPri("sz %llu\n", param.sz); used += param.ent.ul - p->ent.ul; DPri("used %lu\n", used); p->ent.e = e; param.ent.ul = p->ent.ul + used; DPri("ent %p\n", param.ent.e); param.tail = param.ent; p->sz += param.blk; DPri("sz %llu\n", p->sz); } else { err = -1; break; } } p->shwh = param.shwh; if (!err) err = rdu_merge(p); if (!err) { param.ent = p->ent; param.nent = p->npos; err = ioctl(p->fd, AUFS_CTL_RDU_INO, ¶m); } if (!err && want_de && !p->de) { err = -1; p->de = malloc(sizeof(*p->de)); if (p->de) err = 0; } if (err) { free(p->ent.e); p->ent.e = NULL; #if 0 } else { unsigned long long ull; struct au_rdu_ent *e; for (ull = 0; ull < p->npos; ull++) { e = p->pos[ull]; DPri("%p, %.*s\n", e, e->nlen, e->name); } #endif } out: return err; } /* ---------------------------------------------------------------------- */ static int (*real_closedir)(DIR *dir); LibAuDlFunc(closedir); int closedir(DIR *dir) { int err, fd; struct statfs stfs; struct rdu *p; err = -1; if (LibAuTestFunc(Rdu_READDIR) || LibAuTestFunc(Rdu_READDIR_R) || LibAuTestFunc(closedir)) { errno = EBADF; fd = dirfd(dir); if (fd < 0) goto out; errno = 0; err = fstatfs(fd, &stfs); if (err) goto out; if (stfs.f_type == AUFS_SUPER_MAGIC) { p = rdu_buf_lock(fd); if (p) rdu_free(p); } } if (!libau_dl_closedir()) err = real_closedir(dir); out: return err; } #if 0 extern int scandir (__const char *__restrict __dir, struct dirent ***__restrict __namelist, int (*__selector) (__const struct dirent *), int (*__cmp) (__const void *, __const void *)) __nonnull ((1, 2)); extern int scandir64 (__const char *__restrict __dir, struct dirent64 ***__restrict __namelist, int (*__selector) (__const struct dirent64 *), int (*__cmp) (__const void *, __const void *)) __nonnull ((1, 2)); extern __ssize_t getdirentries (int __fd, char *__restrict __buf, size_t __nbytes, __off_t *__restrict __basep) __THROW __nonnull ((2, 4)); extern __ssize_t getdirentries64 (int __fd, char *__restrict __buf, size_t __nbytes, __off64_t *__restrict __basep) __THROW __nonnull ((2, 4)); #endif mng_fhsm.c000066400000000000000000000035601315652647700130440ustar00rootroot00000000000000/* * Copyright (C) 2011-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * aufs FHSM, the management by mount/umount helpers */ #include #include #include #include #include #include #include "au_util.h" void mng_fhsm(char *cwd, int unmount) { int err, nbr, nfhsm, status; union aufs_brinfo *brinfo; char *opt; pid_t pid, waited; opt = "--kill"; err = au_br(&brinfo, &nbr, cwd); if (err) perror("au_br"); nfhsm = au_nfhsm(nbr, brinfo); free(brinfo); if (!unmount) { if (nfhsm >= 2) opt = "--quiet"; } else if (nfhsm < 2) return; pid = fork(); if (!pid) { char *av[] = {basename(AUFHSM_CMD), opt, cwd, NULL}; #if 0 int i; for (i = 0; av[i] && i < 4; i++) puts(av[i]); //return; #endif execve(AUFHSM_CMD, av, environ); AuFin(__func__); } else if (pid > 0) { waited = waitpid(pid, &status, 0); if (waited == pid) { err = WEXITSTATUS(status); /* err = !WIFEXITED(status); */ /* error msgs should be printed by the controller */ } else { /* should not happen */ err = -1; AuFin("waitpid"); } } else if (!unmount) AuFin(__func__); else perror(__func__); } mount.aufs.c000066400000000000000000000162301315652647700133430ustar00rootroot00000000000000/* * Copyright (C) 2005-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * The main purpose of this script is updating /etc/mtab and calling auplilnk. * This behaviour is highly depending on mount(8) in util-linux package. */ #define _XOPEN_SOURCE 500 /* getsubopt */ #define _BSD_SOURCE /* dirfd */ #include #include #include #include #include #include #include #include #include #include #include "au_util.h" #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) #endif #define DROPLVL_STR(lvl) \ { \ .set = DROPLVL ## lvl, \ .clr = DROPLVL ## lvl ## R \ } static struct { int val; char *arg; struct { char *set, *clr; } str[3]; } droplvl = { /* .val = DROPLVL_INVALID, */ .str = { DROPLVL_STR(1), DROPLVL_STR(2), DROPLVL_STR(3) } }; #define DROPLVL_INVALID ARRAY_SIZE(droplvl.str) enum { Remount, Bind, Drop, Fake, Update, Verbose, AuFlush, LastOpt }; static void test_opts(char opts[], unsigned char flags[]) { int c; long l; char *p, *last, *o, *val, *pat[] = { [Remount] = "remount", [Bind] = "bind", [Drop] = DROPLVL, NULL }; o = strdup(opts); if (!o) AuFin("stdup"); droplvl.arg = NULL; droplvl.val = DROPLVL_INVALID; p = o; while (*p) { last = opts + (p - o); c = getsubopt(&p, pat, &val); switch (c) { case Remount: flags[Remount] = 1; break; case Bind: flags[Bind] = 1; break; case Drop: flags[Drop] = 1; droplvl.arg = last; errno = 0; l = strtol(val, NULL, 0);; if (errno || !l || DROPLVL_INVALID < abs(l)) { errno = EINVAL; AuFin("invalid value %ld, %s", l, droplvl.arg); } droplvl.val = l; break; } } free(o); } static int test_flush(char opts[]) { int err, i; regex_t preg; char *p, *o; const char *pat = "^((add|ins|append|prepend|del)[:=]" "|(mod|imod)[:=][^,]*=ro" "|(noplink|ro)$)"; o = strdup(opts); if (!o) AuFin("stdup"); p = o; i = 1; while ((p = strchr(p, ','))) { i++; *p++ = 0; } /* todo: try getsubopt(3)? */ err = regcomp(&preg, pat, REG_EXTENDED | REG_NOSUB); if (err) { size_t sz; char a[128]; sz = regerror(err, &preg, a, sizeof(a)); AuFin("regcomp: %.*s", (int)sz, a); } p = o; while (i--) { if (!regexec(&preg, p, 0, NULL, 0)) { err = 1; break; } else p += strlen(p) + 1; } regfree(&preg); free(o); return err; } static int drop_level(int argc, char **argv, int idx) { int i, lvl, neg; size_t l, t; char *src, *o, *p, *str; src = argv[idx]; l = strlen(src) + 1; if (droplvl.arg < src || src + l < droplvl.arg) { errno = EINVAL; AuFin("internal error, src %p, l %zu, droplvl %p", src, l, droplvl.arg); } l -= sizeof(DROPLVL) - 1 + 2; /* "=N" */ o = malloc(l); t = droplvl.arg - src; memcpy(o, src, t); p = o + t; *p = '\0'; lvl = abs(droplvl.val); neg = 0; if (droplvl.val < 0) neg = 1; for (i = 0; i < lvl; i++) { str = droplvl.str[i].set; if (neg) str = droplvl.str[i].clr; /* with comma or terminating NULL */ l += strlen(str) + 1; p = realloc(o, l); if (!p) AuFin("realloc"); o = p; strcat(o, str); if (i + 1 < lvl) strcat(o, ","); } p = strchr(droplvl.arg, ','); if (p) strcat(o, p); Dpri("o %s\n", o); argv[idx] = o; return 0; } static void do_mount(char *dev, char *mntpnt, int argc, char *argv[], unsigned char flags[]) { int i; const int ac = argc + 7; char *av[ac], **a; /* todo: eliminate the duplicated options */ a = av; *a++ = "mount"; *a++ = "-i"; if (flags[Fake]) *a++ = "-f"; if (!flags[Bind] || !flags[Update]) *a++ = "-n"; if (flags[Bind] && flags[Verbose]) *a++ = "-v"; *a++ = "-t"; *a++ = AUFS_NAME; for (i = 3; i < argc; i++) if (strcmp(argv[i], "-f") && strcmp(argv[i], "-n") && strcmp(argv[i], "-v")) *a++ = argv[i]; *a++ = dev; *a++ = mntpnt; *a = NULL; i = a - av; if (i > ac) AuFin("internal error, %d > %d\n", i, ac); #ifdef DEBUG for (i = 0; av[i] && i < ac; i++) puts(av[i]); exit(0); #endif execv(MOUNT_CMD, av); AuFin("mount"); } /* ---------------------------------------------------------------------- */ int main(int argc, char *argv[]) { int err, c, status, fd, opts_idx; pid_t pid; unsigned char flags[LastOpt]; struct mntent ent; char *dev, *mntpnt, *opts, *cwd; DIR *cur; for (c = 0; c < argc; c++) Dpri("%s\n", argv[c]); if (argc < 3) { puts(AuVersion); errno = EINVAL; AuFin(NULL); } memset(flags, 0, sizeof(flags)); flags[Update] = 1; opts = NULL; opts_idx = -1; /* mount(8) always passes the arguments in this order */ dev = argv[1]; mntpnt = argv[2]; while ((c = getopt(argc - 2, argv + 2, "fnvo:")) != -1) { switch (c) { case 'f': flags[Fake] = 1; break; case 'n': flags[Update] = 0; break; case 'v': flags[Verbose] = 1; break; case 'o': if (!opts) { opts = optarg; opts_idx = optind + 1; break; } /*FALLTHROUGH*/ case '?': case ':': errno = EINVAL; AuFin("internal error"); } } cur = opendir("."); if (!cur) AuFin("."); err = chdir(mntpnt); if (err) AuFin("%s", mntpnt); cwd = getcwd(NULL, 0); /* glibc */ if (!cwd) AuFin("getcwd"); err = fchdir(dirfd(cur)); if (err) AuFin("fchdir"); closedir(cur); /* ignore */ if (opts) test_opts(opts, flags); if (!flags[Bind] && flags[Update]) { err = access(MTab, R_OK | W_OK); if (err) AuFin(MTab); } fd = -1; if (flags[Remount]) { errno = EINVAL; if (flags[Bind]) AuFin("both of remount and bind are specified"); flags[AuFlush] = test_flush(opts); if (flags[AuFlush] /* && !flags[Fake] */) { err = au_plink(cwd, AuPlink_FLUSH, AuPlinkFlag_OPEN | AuPlinkFlag_CLOEXEC, &fd); if (err) AuFin(NULL); } } if (flags[Drop]) err = drop_level(argc, argv, opts_idx); pid = fork(); if (!pid) { /* actual mount operation */ do_mount(dev, mntpnt, argc, argv, flags); return 0; } else if (pid < 0) AuFin("fork"); if (fd >= 0) close(fd); /* ignore */ err = waitpid(pid, &status, 0); if (err < 0) AuFin("child process"); err = !WIFEXITED(status); if (!err) err = WEXITSTATUS(status); mng_fhsm(cwd, /*umount*/0); if (!err && !flags[Bind]) { if (flags[Update]) err = au_update_mtab(cwd, flags[Remount], flags[Verbose]); else if (flags[Verbose]) { /* withoug blocking plink */ err = au_proc_getmntent(cwd, &ent); if (!err) au_print_ent(&ent); else AuFin("internal error"); } } return err; } mtab.c000066400000000000000000000113311315652647700121640ustar00rootroot00000000000000/* * Copyright (C) 2005-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* cf. fstab.c or the locking for /etc/mtab in util-linux */ /* * we need to extract a part of util-linux and create a simple and generic * library for locking /etc/mtab. */ /* #include */ #define PROC_SUPER_MAGIC 0x9fa0 #include #include #include #include #include #include #include #include #include #include "au_util.h" void au_print_ent(struct mntent *ent) { printf("%s on %s type %s (%s)\n", ent->mnt_fsname, ent->mnt_dir, ent->mnt_type, ent->mnt_opts); } /* ---------------------------------------------------------------------- */ static void lock_mtab(char *pid_file) { int err, i; for (i = 0; i < 5; i++) { err = link(pid_file, MTab "~"); if (!err) break; sleep(1); } if (err) AuFin(MTab "~"); } static void unlock_mtab(void) { int err; err = rename(MTab "~", MTab); if (err) AuFin(MTab); } static void append_mtab(FILE *fp, FILE *ofp, struct mntent *ent) { int err; struct mntent *p; while ((p = getmntent(ofp))) { err = addmntent(fp, p); if (err) AuFin("addmntent"); } err = addmntent(fp, ent); if (err) AuFin("addmntent"); } /* todo: there are some cases which options are not changed */ static void update_mtab(FILE *fp, char *mntpnt, int do_remount, int do_verbose) { int err; long pos; FILE *ofp; struct mntent ent, *p; err = au_proc_getmntent(mntpnt, &ent); if (err) AuFin("no such mount point"); ofp = setmntent(MTab, "r"); if (!ofp) AuFin(MTab); if (do_remount) { /* find the last one */ pos = -1; while ((p = getmntent(ofp))) { if (!strcmp(p->mnt_dir, mntpnt)) pos = ftell(ofp); } rewind(ofp); if (pos > 0) { while ((p = getmntent(ofp))) { if (ftell(ofp) == pos) { /* replace the line */ p = &ent; pos = -1; } err = addmntent(fp, p); if (err) AuFin("addmntent"); } if (pos > 0) AuFin("internal error"); } else append_mtab(fp, ofp, &ent); } else append_mtab(fp, ofp, &ent); endmntent(ofp); /* ignore */ if (do_verbose) au_print_ent(&ent); } /* ---------------------------------------------------------------------- */ int au_update_mtab(char *mntpnt, int do_remount, int do_verbose) { int err, fd, status, e2; pid_t pid; ino_t ino; dev_t dev; struct stat st; struct statfs stfs; struct flock flock = { .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = 0 }; struct mntent ent; char pid_file[sizeof(MTab "~.") + 20]; FILE *fp; err = statfs(MTab, &stfs); if (stfs.f_type == PROC_SUPER_MAGIC) { if (do_verbose) { err = au_proc_getmntent(mntpnt, &ent); if (err) AuFin("no such mount point"); au_print_ent(&ent); } return 0; } snprintf(pid_file, sizeof(pid_file), MTab "~.%d", getpid()); fd = open(pid_file, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (fd < 0) AuFin("%s", pid_file); err = fcntl(fd, F_SETLK, &flock); if (err) AuFin("%s", pid_file); fp = fdopen(fd, "r+"); if (!fp) AuFin("%s", pid_file); pid = fork(); if (!pid) { lock_mtab(pid_file); update_mtab(fp, mntpnt, do_remount, do_verbose); unlock_mtab(); return 0; } else if (pid < 0) AuFin("fork"); err = fstat(fd, &st); if (err) perror(pid_file); ino = st.st_ino; dev = st.st_dev; err = waitpid(pid, &status, 0); if (err < 0) { perror(pid_file); goto out; } err = !WIFEXITED(status); if (!err) err = WEXITSTATUS(status); e2 = unlink(pid_file); if (e2 && errno != ENOENT) perror(pid_file); e2 = stat(MTab "~", &st); if (!e2) { if (st.st_dev == dev && st.st_ino == ino) { /* * The device/inode number is same, * it means it is we who made the file. * If someone else removed our file between stat(2) and * unlink(2), it is a breakage of the rule. */ e2 = unlink(MTab "~"); if (e2) perror(MTab); } } else if (errno != ENOENT) perror(MTab "~"); e2 = fclose(fp); if (e2) perror(MTab); out: return err; } perror.c000066400000000000000000000031301315652647700125500ustar00rootroot00000000000000/* * Copyright (C) 2013-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef __GNU_LIBRARY__ #include #include #include #endif /* __GNU_LIBRARY__ */ #include #include "au_util.h" int au_errno; const char *au_errlist[EAU_Last] = { [EAU_MVDOWN_OPAQUE] = "Opaque ancestor", [EAU_MVDOWN_WHITEOUT] = "Whiteout-ed by ancestor", [EAU_MVDOWN_UPPER] = "Upper exists", [EAU_MVDOWN_BOTTOM] = "No writable lower", [EAU_MVDOWN_NOUPPER] = "No upper exists", [EAU_MVDOWN_NOLOWERBR] = "No such lower branch" }; void au_perror(const char *s) { const char *colon; if (!s || !*s) s = colon = ""; else colon = ": "; if (!au_errno) perror(s); else if (0 < au_errno && au_errno < EAU_Last) fprintf(stderr, "%s%s%s\n", s, colon, au_errlist[au_errno]); else fprintf(stderr, "%s%sUnknown error %d\n", s, colon, au_errno); fflush(stderr); } plink.c000066400000000000000000000137631315652647700123710ustar00rootroot00000000000000/* * Copyright (C) 2005-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #define _FILE_OFFSET_BITS 64 /* ftw.h */ #define _XOPEN_SOURCE 500 /* ftw.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include "au_util.h" /* todo: try argz? */ static struct name_array { char *o; int bytes; char *cur; int nname; } na; struct ino_array ia; static int na_append(char *plink_dir, char *name) { int l, sz; char *p; const int cur = na.cur - na.o; l = strlen(plink_dir) + strlen(name) + 2; sz = na.bytes + l; p = realloc(na.o, sz); if (!p) AuFin("realloc"); na.o = p; na.bytes = sz; na.cur = p + cur; na.cur += sprintf(na.cur, "%s/%s", plink_dir, name) + 1; na.nname++; return 0; } static int ia_append(ino_t ino) { int sz; char *p; const int cur = ia.p - ia.o; sz = na.bytes + sizeof(ino_t); p = realloc(ia.o, sz); if (!p) AuFin("realloc"); ia.o = p; ia.bytes = sz; ia.p = p + cur; *ia.cur++ = ino; ia.nino++; return 0; } static int build_array(char *plink_dir) { int err; DIR *dp; struct dirent *de; char *p; ino_t ino; err = access(plink_dir, F_OK); if (err) return 0; err = 0; dp = opendir(plink_dir); if (!dp) AuFin("%s", plink_dir); while ((de = readdir(dp))) { if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, "..")) continue; #if 0 if (de->d_type == DT_DIR) { errno = EISDIR; AuFin(de->d_name); } #endif err = na_append(plink_dir, de->d_name); if (err) break; p = strchr(de->d_name, '.'); if (!p) { errno = EINVAL; AuFin("internal error, %s", de->d_name); } *p = 0; errno = 0; ino = strtoull(de->d_name, NULL, 0); if (ino == /*ULLONG_MAX*/-1 && errno == ERANGE) AuFin("internal error, %s", de->d_name); err = ia_append(ino); if (err) break; } closedir(dp); return err; } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ static int proc_fd = -1; static void au_plink_maint(char *si, int close_on_exec, int *fd) { int err, oflags; ssize_t ssz; if (si) { if (proc_fd >= 0) { errno = EINVAL; AuFin("proc_fd is not NULL"); } oflags = O_WRONLY; if (close_on_exec) oflags |= O_CLOEXEC; proc_fd = open("/proc/" AUFS_PLINK_MAINT_PATH, oflags); if (proc_fd < 0) AuFin("proc"); ssz = write(proc_fd, si, strlen(si)); if (ssz != strlen(si)) AuFin("write"); } else { err = close(proc_fd); if (err) AuFin("close"); proc_fd = -1; } if (fd) *fd = proc_fd; } void au_clean_plink(void) { ssize_t ssz __attribute__((unused)); ssz = write(proc_fd, "clean", 5); #ifndef DEBUG if (ssz != 5) AuFin("clean"); #endif } static int do_plink(char *cwd, int cmd, int nbr, union aufs_brinfo *brinfo) { int err, i, l, nopenfd; struct rlimit rlim; __nftw_func_t func; char *p; #define OPEN_LIMIT 1024 err = 0; switch (cmd) { case AuPlink_FLUSH: /*FALLTHROUGH*/ case AuPlink_CPUP: func = ftw_cpup; break; case AuPlink_LIST: func = ftw_list; break; default: errno = EINVAL; AuFin(NULL); func = NULL; /* never reach here */ } for (i = 0; i < nbr; i++) { if (!au_br_writable(brinfo[i].perm)) continue; l = strlen(brinfo[i].path); p = malloc(l + sizeof(AUFS_WH_PLINKDIR) + 2); if (!p) AuFin("malloc"); sprintf(p, "%s/%s", brinfo[i].path, AUFS_WH_PLINKDIR); //puts(p); err = build_array(p); if (err) AuFin("build_array"); free(p); } if (!ia.nino) goto out; if (cmd == AuPlink_LIST) { ia.p = ia.o; for (i = 0; i < ia.nino; i++) printf("%llu ", (unsigned long long)*ia.cur++); putchar('\n'); } err = getrlimit(RLIMIT_NOFILE, &rlim); if (err) AuFin("getrlimit"); nopenfd = (int)rlim.rlim_cur; if (rlim.rlim_cur == RLIM_INFINITY || rlim.rlim_cur > OPEN_LIMIT || nopenfd <= 0) nopenfd = OPEN_LIMIT; else if (nopenfd > 20) nopenfd -= 10; au_nftw(cwd, func, nopenfd, FTW_PHYS | FTW_MOUNT | FTW_ACTIONRETVAL); /* ignore */ if (cmd == AuPlink_FLUSH) { au_clean_plink(); na.cur = na.o; for (i = 0; i < na.nname; i++) { Dpri("%s\n", na.cur); err = unlink(na.cur); if (err) AuFin("%s", na.cur); na.cur += strlen(na.cur) + 1; } } out: free(ia.o); free(na.o); return err; #undef OPEN_LIMIT } int au_plink(char cwd[], int cmd, unsigned int flags, int *fd) { int err, nbr; struct mntent ent; char *p, si[3 + sizeof(unsigned long long) * 2 + 1]; union aufs_brinfo *brinfo; err = au_proc_getmntent(cwd, &ent); if (err) AuFin("no such mount point"); if (hasmntopt(&ent, "noplink")) goto out; /* success */ if (flags & AuPlinkFlag_OPEN) { p = hasmntopt(&ent, "si"); if (!p) { errno = EINVAL; AuFin("no aufs mount point"); } strncpy(si, p, sizeof(si)); p = strchr(si, ','); if (p) *p = 0; au_plink_maint(si, flags & AuPlinkFlag_CLOEXEC, fd); /* someone else may modify while we were sleeping */ err = au_proc_getmntent(cwd, &ent); if (err) AuFin("no such mount point"); } err = au_br(&brinfo, &nbr, cwd); if (err) AuFin(NULL); err = do_plink(cwd, cmd, nbr, brinfo); if (err) AuFin(NULL); free(brinfo); if (flags & AuPlinkFlag_CLOSE) au_plink_maint(NULL, 0, fd); out: return err; } proc_mnt.c000066400000000000000000000057471315652647700131000ustar00rootroot00000000000000/* * Copyright (C) 2005-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include "au_util.h" #define ProcMounts "/proc/self/mounts" static void copy_ent(struct mntent *dst, struct mntent *src) { free(dst->mnt_opts); free(dst->mnt_type); free(dst->mnt_dir); free(dst->mnt_fsname); dst->mnt_dir = NULL; dst->mnt_type = NULL; dst->mnt_opts = NULL; dst->mnt_fsname = strdup(src->mnt_fsname); if (dst->mnt_fsname) dst->mnt_dir = strdup(src->mnt_dir); if (dst->mnt_dir) dst->mnt_type = strdup(src->mnt_type); if (dst->mnt_type) dst->mnt_opts = strdup(src->mnt_opts); if (dst->mnt_opts) { dst->mnt_freq = src->mnt_freq; dst->mnt_passno = src->mnt_passno; } else AuFin("strdup"); } /* * Ideally the mounted aufs should be unmounted even if its mntpnt has very long * pathname. In other words, if umount.aufs cannot handle a long pathname, then * mount.aufs should reject in the beginning. * getmntent(3) in glibc reads 4096 bytes for a single mnt entry. I agree it is * large enough. And mount(8) rejects too long pathname. It is OK too. As long * as 4095 (4096 - 1) bytes pathname succeeds mounting, then it should be * unmounted flawlessly. * Testing on Debian v7 (wheezy) succeeded mounting 4095 bytes pathname, but * failed unmounting. I don't like this unbalancing. So replace getmntent() by * getmntent_r() with larger buffer. Obviously this is less important since such * long pathname is very rare. */ int au_proc_getmntent(char *mntpnt, struct mntent *rent) { int found; struct mntent *p, e; FILE *fp; char a[4096 + 1024], path[PATH_MAX], *decoded; decoded = au_decode_mntpnt(mntpnt, path, sizeof(path)); if (!decoded) AuFin("au_decode_mntpnt"); fp = setmntent(ProcMounts, "r"); if (!fp) AuFin(ProcMounts); /* find the last one */ memset(rent, 0, sizeof(*rent)); found = 0; while ((p = getmntent_r(fp, &e, a, sizeof(a)))) if (!strcmp(p->mnt_dir, decoded)) { Dpri("%s, %s, %s, %s, %d, %d\n", p->mnt_fsname, p->mnt_dir, p->mnt_type, p->mnt_opts, p->mnt_freq, p->mnt_passno); copy_ent(rent, p); found = 1; } endmntent(fp); if (!found) { errno = EINVAL; AuFin("%s, %s", mntpnt, decoded); } return 0; } sample/000077500000000000000000000000001315652647700123575ustar00rootroot00000000000000sample/00index.txt000066400000000000000000000005231315652647700143670ustar00rootroot00000000000000 - brsync rsync between aufs branches - auroot script to build an aufs chroot-ed/jail environment for internet - auware aufs for VMware Appliance - servers aufs for cluster servers - vserver aufs for Linux-VServer - logrow expand the size of the mounted loopback block device - uloop loopback block device in userspace sample/auroot/000077500000000000000000000000001315652647700136705ustar00rootroot00000000000000sample/auroot/README.txt000066400000000000000000000044161315652647700153730ustar00rootroot00000000000000 A sample script to build a chroot-ed/jail environment for internet service. Junjiro R. Okajima o Introduction Some internet services such as HTTP, DNS server, often run in a jail environment which is a separated and chroot-ed directory hierarchy for such service only. The system administrators generally build the directory hierarchy and copy some necessary files into it such like binaries, libraries, devices and configuration files. Thus, it is also often that he forgets updating those files after upgrading the system packages or configurations. This sample script addresses this problem. o Using AUFS This sample script can share all of system directory hierarchy with using aufs. It builds a separated directory with a writable empty directory and the system directory which is marked as readonly in this environment. For example, when you mount aufs with "writable empty temporary directory" over "root directory with marked as readonly" then you will get a modifiable new root directory with no harm to the original root directory. All of the modification goes to the new jail environment only. It must be effective and suitable for a chroot-ed environment for any internet services. If you have your /usr or /var in separated disk partitions, you need to mount aufs for each partitions. See the sample script. Note: Generally you don't need to stack over kernel virtual filesystems such as /proc or /sys. To stack over those filesystems has no meaning and aufs doesn't support it. o the sample script The 'auroot' is very easy and simple script, and you can customize it for your purpose. Currently, it mounts 4 aufs under the given directory, which are root, /dev, /var and /usr, since they exist on the separated disk partitions on my test system. And then, modifies etc/default/apache2, invokes chroot and executes the given command. For example, when you execute "sudo auroot /tmp/jail /etc/init.d/apache2 start" the script will mount, /tmp/jail = + / /tmp/jail/dev = + /dev /tmp/jail/var = + /var /tmp/jail/usr = + /usr and execute, "chroot /tmp/jail /etc/init.d/apache2 start" Any modification under /tmp/jail will go to , and the original system files will never be modified. Enjoy! sample/auroot/Robert.Wotzlaw.tar.gz000066400000000000000000000214771315652647700177350ustar00rootroot00000000000000‹£Å,Lí=iwãÆ‘þjýжfÏ$$Hê˜ñÈgå9be5ÇêX?¿¬W‰¦ Ò(¶ÿûÖÑ'Ø 9Gœ+<¿±TWWWU×Õ‡ÆY2™•q3­¢8->û—|ßÝí}&†ÿrü§©ê¸â³¼Ém7\^\–E³ø-Hú-Ÿ{_ Æi>ÇÕlkëžxV,nËôrV‹ÏŠáh(Nб,kñ}Qÿ3‹o¶îÔÙ,­Ä¢ŽÄóžðzZJ)ªbZßÄ¥Œü‡¢“8¥LÒª.ÓqSK‘Ö"ΓAQŠy‘¤Ó[|Ñä‰,E=“¢–å¼Å”~ùËësÀò™Ë2ÎÄÛfœ¥qœNd^I%ø¦šÉDŒo©ÁK¤áTÑ ^€7®Ó"ÿÐÈ Jq-Ë Þˆ݉Â×@у¸·ESŠbÍ¥·"‹kÛ.ZâŽÝ/iNhgÅF3„0¾›4ËÄXЦ’Ó&ë€ß}÷æüL¾þA|xrrøú쇯¶žðU^KÆ”ÎY ˆaLeœ×·@8 xõâäÙwÐâðۣ㣳ú—Gg¯_œžŠ—oNÄ¡x{xrvôìüøðD¼=?yûæôE$Ä©D¢$rõõyg§$``"ë8Í*+Ì (Ë1‹¯%u"Ók +PGfxGœù%`- ¿éTäEÝ7e *RËÒ„ÖVž=q”O¢žØPœ_eÀôShþ2ê—YQ”=ñmQÕúêPˆáÎh4ìv‡#!ÎOQãÇžì‰k1ŒF¤ùýáã>Žì?>Øßûãpç`8ôZô>n‹GýÝ!¶Øì<1-fqåò†ÁÍã÷1Úµ}àDœÅù¥¬àGyç‰íŒÂ`îÒ:&k…þ[Ö!º°õ$kI¢ãy“’…÷öXTrR³²ès™IJY¨Y¡5|â€fåe3—y]1ð‰d5 À‚{b2‹8‘j™êÂ|î‹ÃÅBæ‰fŸ$Þа›<žK×ÎøeËÔ"N3Ɉ“Ä2ʼnNº˜æà ²,V£ƒ¹Üé(o›4#Ÿ•E³2¿NË"Ç+«uxþò”B<ÎhàÏŠ¼FŽÀ;øOŒ"àv]‰B ¯v"0!»Z¤%w[°ý›¦eUƒTò:žÔºË `M`u[ÕrNâb ,Šþe¢š(b/oK¹ˆ¹ 1LË®‚eoËJ¡Ä†ûÌ£¬~ ºðæÕ ~uU fEZfÇàbx‰ÿjÀä±-¡˜–Åœ>¯Àò8WDI7"¥‡9üÆòU$Îs¥¦  ÀÚø–?‰Ä³LÆy ¶ A“l]]”·bPÏ1Fâ{4íÀפ@ãÕ¨^ÆvAJ~O–%šR 7¾”Ɔ#–æ¤H¤&¤š”é¢ö|´Ó §æj\P¹´wÒhQ}«°þ*; ®4M<¶<8c ÎpçÈé—0Ä)iÉCöGjË£„ˆ®ÉI÷¬o÷ù8ø | `ÁnZ‹4‰n=Í¢5c€â˜õà&3IöçÌÌ ~ËÐ"ÑÜ´hqöS%LµÄƒ.eœª"Ïnm~j}€w‹RV8ÝÉÒ(6êï©Dqª—e““NñtëB MF/q–Æ éFüzŽ:pŒBb£y cäÊJÙ¥0HàÝÆMáŠf ö$Y ¾š”ŒQš“ÅdA’‰¯”°•„# %ŽË|!­´;ô±ƒMGºT¸:ÔP[2 6aBÞàÜD:c¥åO%ñ<¾TSinõ¢ü®6Œ:@Ô ê˜£·K˜ç¹ L4ë&‡yD)= fZdYqƒÌRÔd¨|C8gáãqš'51ØÅa€UáßY«ÜÏÛ4gå-vØäWyq“ë«"…§°-]1‹1è]Qc09'Ó‚´ô©,I8 v%ˆ(Îãì¶1Ö¬Á&X wÅœDµ™¦—vAaáR›=”1y`Þ^ÇÕ‹xr»"qš€Ÿ:_TÂܹ™™` ‘F`<މӋ’ñ€·:–08J ˆ÷‹,¾µ3Èí|Ò³b¾@S豕ì íŒbþ\J0sL‰±ŒL<–*5›Tû²˜€£à®¾bYP@_Q%aï·¬<÷§ã€»ÖnlÝÄqÕÃUõ>ªæ’¥p•ýè¦>fU„kL‘%f-†÷ÀÇEQÆå- S¤¸&OEjžµÃ hVÌeÛæ¡{«åB¥#8 [å´PV+—ïjÐÍ1öã!…¼UNbœW¨p4Á]Õ'{fÍZ_ÖÍ !Ï›RóÕí‰ä‰?(q¢Eë1ÁxB4‹€w‹´+ß8(ԎΘåë-ÎK4ÅâU‰¿‚EâÍUüS:µåÑÀqƒ"ÐÎÀ{)ˆ¿dÿ”’=—ãèWª+0Jé×E‘U6†x~øh8ê[Ðå7U9¨fÀÍARL€‚1 ´EhŠ”FYÅIS’Rdª4tŒ<€¾œþ,O *’es;NpЕ¶‘]Ϭ®ƒeÕB¬ZËå"p%!EîM V¢Y¦4ˆXË’"‘Vƒ`§f|*Äõ¹ª‡j”ªÅ“‡+çñUõ µˆŠòr5ðådbÚ‹v£½•à÷FâôÕ[ñ=èÍK9;{bôÕÁðñÁÞŽ8?{Fœð„EþáÀ4ç¤i'ÚFÞJ¸÷ú;œ64I ~…T›sñ-äŹì^ÆXUùN‚ùÚ‹FÑþƒÑÃ~)ÁURk‰2Y«žçÆ žçò:HKÕG<É ›«ð1‘׃*‰÷-8Ùì5à# NNi ¸åób§µÄX¥'¯³Üj>ÖµàO<ðAVL`vƒ;Š5¸Ž×£9¼©Êëõð;>Kǃ4OëAy³œ |X¢SíBß"n«N"à›‹Hêm࣠l„lQŠ ‹:L|[Ô•Gë` ÿAH2Ÿ«²=ørRg> t Èê¦óúbžVÌç÷¶ ‚°»â Í&ýãâ#Y˜´²Ã‘A]Q€ *LvA!XÆÜŠ]åŸÈuàÿM…ž7^ÔNí"Òµ‰:®®”Ï¿¬ÑÇâcHÌóK¡9u”b[Qa9\š é+]Ë!Ä“œ¡—@Xµ)o)SiºnL)1°¿’G‚9–|‡vüj*ó‰Ê¾R¤*Jd²]Ÿì«Ø¾©ïŸ€m*ŸF¢úQ™¶R‰øŠª‘)ià\RŸmš©€@Uæä|¶€mNÄAeMÉȶ­˜nCƒB\Bø4¡+t¨³ηR8 ]•¬áªÔÁÚßÁçÓ`Е3’èjHhn€ó‚±¯àÄ¢¡…¿¶Øïº¦ûß ™û¹ørð¿÷þ¤øõÍ wÿ‹Å—í‘~ã•éŒT/ͪ *Ž(ÓËzfò$@:5q²$±!åLgcœ=R(NµyÃUÂÑEµªIŠöXà¤Wó"÷÷äîzä‚þd-rú‹aÔ¨òHÚÑ·ÜÁ^D‹‰Þê Î&[鹡A…ëÁ¾"8hÚmÌ4¦ ÌÓ;;_ô‚ÉC[ìw´°J Ê›<ÊBPZ]ód*¹òÍ«™m¥qàD¦••EOdÔ6Éš0UF™Pw9‡¤cfgZSS@æä *˜OyM(ýg¬V^LoTKçž¾;<¾‹Ë„IÇŽƒ’|lsßÊò!äÅr^`M K"—jHÿ'W•6ŠºÛÊÔuH1q¥/T˜Gªè•W7ÀEhv+«ž7ÿU Ö„°Ì|ýL)œ±¨A}±DA«Ð+–.:Hú¯I3.€Maóâ«ßR!C‹Öè…ꘇ`uÙg–&7å³WX¼Àä›&Ïq½Z¥—”’»Âµ†¢¼Rd£Ny3(nŽ®B€R­¹¢²¾B–éN4ÇZSWÖŠØ¢déi»~¸ái×5DTƒ_Ž~t©cƃ³(Ê9†ÌÆff>ûuQ5/j7³7µéš”Å"áŠ+ò®Ý¢8]y] "=Dê Í刦g¡l3¤ ´`NwàKKÌGœ‹¡ð>r‚½S”¸«¾.!ñ%à¾LÔÊŒc˜ÌJóëâJöËI”PK°Ž ÇÕ<#GÊiYÕdµ]MCDk«tËÖ‰XËÖê€þã±"¯ûêG§Ù‘IËPÑnÐpŒ!ì»â臀ÙâtJkHÆH¿UXƒG+îú©±)Ñ#—Ìï`š´ì¡ÖÌAg3¬›C×fs•‚—åNAb ÈÝ¢ŸMqœKK1ßu‘&=OOU6ƒbãM9qÕBR5¤±Ó&‹ï®Ìˆ«|ɸ©tðŠì^£[î*úzLƒƒ™eHSÛIÉrBôUwb÷!Ôéä r¦´nï °6¶B„C20v«G2¸ÙVíX È_ UIàÄ]Ñ‚§ÜMž¥ó}–GoK*ÁÝ8€PÔyf†Ù‚ƒîóþ• ±Î›ùÔB¬ŽÚÀhúk›–Y t³jCînG„CR}ß0§Óøì"­øk;ñC|l.ÅdÞÎÊCxG©o¼("nçÙM|‹ëô—E‘°Ó§ŠuVTŒ©ÁõET-OaŸk+üÊ*¶­s°Í´Ù¤0ß(¬œUÐÉ?euÉÞªg›pZVr¶Q"5Æ)ªãkt‚Ð:Ë ©>³p˜Ñq'À¡)Êë÷„'ø{éǸ߶LÐMaüj6[s4ìO™x¨CŸ–cë€7%ž‰$<Ñh TXíµ) Z¡jFèl‘š¬Ê#âÊRj'KÖ*‡+ ´ˆ H¦q“áš&öØãí XÁ]ÿéëª Èä’LªóHö ŸØÆP¸8ø>g ¶Õa ´Ö”3óÉ–6®­Pg\ ªÒº‰ksæö%:êŠRµj£†Æs‹¶§:?ÅX*Þ²:¢Lºæ6žajÍÝò<'m¡üë, 8àÿ°ô£6ƒ)}¬$ر¾µ:‚1Sjá”ûÚòÂJÊÛ ïœVðNALèYCfT À–z~ÖDªíhøÿ_7àÎYÞËœ^ÉåœÊáÇŠ#K+V9:¡IÓ^Ï c¶ñÓŠüììdë©-p=踇„õªòº‡0{X}è5UIu°žWî‚·½ë¸ìa÷¿ykŒ’*;xÊ*,sÊÉtÄç!6ÅWÄ£²ˆ~î  öF`¨×6vÝ©Ä-}æÃŸî,Q ÆPœ†oŒj¶ݾ–æ„ØDw§ìJÀgé“j[žæŸª Ñ\O še[|æ½ Îû q,¥îe¢7õ;;/XË*>çå’€³H ¡²ç¬q7qI8'DLÇút¾6ðÊÖyŸ™5h;õ¡²7hÔÍ’ºSÈ[±ý‚L5$KlÒ˼²8•á Í«[2³j¥–X…”tê­Šµ…Õá¬&úéqé + u:§už‰Þ-„!Qtò€Ç‰Q.¡ã"5„HUªÎ]™1¡3-*:â®\·¤Ë¿ú°&a—E]s‘§Ò:DQlK GªbëƒqÅ|ÑÔ4lT‡¦NpÿÅ“¢ñJò,œ=‡eÜóò5¬ÁNšeh«‘ýÄlt- !D[;œúT fˆxöã§óÓoy‚¦ Cd!h†£WS‘QÑöš‘#aN„,剕«Ú\øp¶ö“árù¤§îÛ¨cËN’ö”¶‘åw³Dœ(F›™Én#︵;  !oM/ÁûS?E‰þ;tßýD. Üé/bø×ÊÍÿüA|!úõíˆA8¼K!¼ §BC±mà·íþß8«0†¬¶*&Wri)ŽÎö`ŒIÁ7oqIçJC+ ïA{f8DˆzÿÑl}ø¡ø-}`]í{ç¥È¥àP‡† ÈÌVô¸'äFsÿâ­,!dU«Š¥üGC¥EWœçãz7EÛjñÄÅõ– ï฽(éD9Âé(ãe¹)øAÄæVŒ– #Ln¾oh§SŒ‡n8j×& ÏݰÛOÕh—h0Û+¡nheßèYÙTyܼJ«î%'Bls ´-øh°À+8´eq°¨µ{rüú`7sq¢Žc ÞÒ¿Ì%d_- ^?¨ï@dýФdl¯µÕ1Dq£Ï#ÍY]}D‡@–ÉõzíÖ”#¨àˆ}¡Z­› }/ ØîõæN_JdIt.$±æ×£xDãÂtß‘$úYÏ!ehFo-Kdö@‘®ŒÕõËB꼤~Z™ƒDgZU ™eèµ¾»‡cHp‰öŒ%WµÜÑ÷:êQËÕÅvØNáy ä¦­í¹‡:*Ÿ¡Ú8 twŒù޹º¶ÁžÆc4ÕÅXµ…ç¸ñ˜Ž‘úèj8­  jçtüsÛíjAñ‰`ãéÔE¥Ù‚! x=”¦:cT˜¶Vbµ:3lg„1)Fs°$1g Ô3-%u\¶ñ§ ïpË9Š_¨.,$}å 5°ÓX3‰LLCA$·£ð[¯ßuàbC¯Qñy{Œ½QmL¶e!ÏiIbÀú'o^)ì=êR#CјÒfqÿyÅPTq9øç"<é[Aò¯~ȆƋ§_²$[ŠA$îH‡vÞhZD›q.õfA길ÄÅ|:éåǽÅ=7Ž7+„üI/«={‰×…Ùî­+‘¿!ÅsJð¬}Ž**Ñþ"Çö® 0"[„U#×è\Ýk¼g½»Nò¾+°V1L^PÓ7³[íAÚÓÀ‰§JŒ;õÓ ¯¤P§÷¬±ÒVÈ+O8ؼ ÕØêK—H52›Ó"}]KÏù²ƒtÞZí{C#³/õ©¶#­§qÞÚ®9á"-îš@mñÄ>MÃ8k‰L[A3©àÂsNêÂ]rÀá…/æ¾ýŒU‘®*‡æ Ÿ?MP«cqzxvˆ5tYNcÅ6]árY>u¯B2wX«;—I& ˆl9»ð–sSá¹oìÏbæW £ë8Ks£2Ei—®ÐÔ¤ ¤¬ðìbEìžéSAÞå-K2¯õµ$¿Û дì9x$§r•tâb7˱O–l«-s·ôŒêÌ2/šË'¾¶e•N‚™¿á¸µsu¥k[îEâôâ¦Zž×ƒ7¥ÑVIJÎFèÒ1Oa¢¶]Ëçw„džƒç9êºpÏ™°Êž— Ý Eg½‡îVc·SñÊ«<`pî±.ÃO€ír±Zè ñbÍi-]QZëâ¢-É<ÊîM5ÝkW>2¥fñ„"™´roæâ½ÏÞ,¨ü.}\ûÞèQ[ŒWŒ©IK£û}Ú˜º“§ÂKP/ð× ÚérÀ/èпñ+ô˜ÑîÇ:­\ w;Ôj·Ä¬v3ðHP;¬KI@"1.ÿ_欯ya|¨g…‰ k™/ WÅ".^’Çd@Œµåãà>>µ¢¬´ñÝ_þ¢?â¾ÌÞää YŸ{0±'O¾%Ù*-;d~ÎYüìùŠÖ^ú¶pü›}hžÅßZ7_".d;y…•ãçÅê Ô +™ù:1 P¸já,[hÉ.x,«0.åëqeF[M¦zé”…¥[á‘8Œc§L§zQš8¬;ê<¢‹riv£";â²RéwWaôö¿ ïžÜj¯Êgi.+} `×ExU*m\R( ‡oU,‹¤›my/½£"k·øŒcá¼ÚÒ·°bSÚNÀ÷:^ÐÜ{Ån‰³\1}ÕÒœ{6 ô¦kŸ‘ïåMe𬲱â` X-8™À»*Â-½f `eYq8Ôw1Ÿ•|ï†5Öã ‚¬`^ ³L25òY³p s2ÀĪYcàSßÅœ¦ZÍøn˜ca=æ È æà…<Ë4S#Ÿ9k×0 ÌA¬š9v>õ+˜C -ë8ÄgO\>¹ÍÚÌRЫYÆw‡£š/1o°i›õ|$ð03¹‡£îPCãìbîx½sÄÇ<wØ,ZÁÊqØnvkÐ5œ‡mרµ]ãí¢>º… +…†Q´Ç(ZÁ(ºÄk™vnæ3jÐ5ŒBУ±f”3Ö(ºU•×+ùß ›,¬Ç%YÁ$¼Ík™ljä³h-àd€?ˆU³ÇÀ§ÞŒ®óGë¦úý—7½¼À`þW5iÒÚý5}^ê€0XâÕa-þÝV¶þÞÝ|_ 0¨ÍNK].sºÕ­¨cý[¨£ kqœûĖظtÌà¶ì½¥ˆÓ¢]ßÎÒ3D±µmW´µta|·ù@Ý3åAàË‚]ôÐá•pKèàeÂ.nmtãû–½þäÞ‚þzK5s_º?›c}Uè¾YïÓiïô0ÙÇÿ‹0½È¢‹ª³Yð¦#<$LJ1sSŽ ³’=ÿ¤¨‡‹6üàNq½‘xåe'Îùa}ÑY²èû¸û»­iËo .… ÎEU¥cº×nçºN|Îq¾ô'@ÜC¢&çî+ÞÖq»½ød5U­³ž©¹ìvéâÚv!ú /?xmPw[Ï{ÜE» ºö­´¡¼çý´›ô¸©v£fËwÖnÔ,t{í& 7ºÇ6(üo´ ´þð»mW躹!«eLÇã½´=DJjcR3s©6|¿FûÑFtAJˆVL½Â»ì®eB÷4: û¦Ð5X“wZñZŽNRî7|ñ—£×âèõÑüóò‡þmY\§‰¬¬—\̉üG“·út0‰ïã¶ÄZ^àb%ð ƒkõX~JKq ÷ç ¿Ä“Eš„­âu?—t‚Â![ìˆ]±'ö;À ÒQ»Ó²î?—,D2悾˜/è#³‰Eì²…ÚiÃꩽ#¤›áÁV®RÎA﹎±žŽä‹Áqkź¹LM/F-ù¿xý<(}çÇ·‡gß=mÝ–ràüN¿òKóÅÂŒñ€ªÁõüðÅ«7¯Ÿ¼É•ÌÛ£çÏNžbhP6¡Ï/Ž_<½Ïpô9òeûúðÕ‹§­fÜëùé‹úBìu?¿8}öt»[nÛa¾ ýwâ>£¿ü•Üa<⓳ŠoMîëû«0ø=ó—¾f´œNŠH÷oÄׂò_11Ððúo¢?å+þ0rGü(¾F³á÷óh Ìù:MÃô¨rpð[R\Pìõà¡øÙ툨úBô¡&~\¦æs“Ä* ï#ßÊwßJðÀù9Ô£ÿs"«gWûjVõû&öûÅd7ü D'dî+õÂ%2‡„~í¯s¿¯¼xóöìÔýuwŠE›9aÒ ÚìƒCÃ3èû.`7QïGëzÚ(ƾg‹[lÓ ÿ:P›ÎsšQopõðïaézÉ_š5þuÈÅï÷©½x*¶)lÚªé‹)huSÊ‹y1á³Ø^sç'Ä2ù¢Æ_5‹íÎÙÂ]Q˜¶íá]š-õÐCÂXtBŽr/àÇ ÞµýáT/(6â+üëZt¨×Þºú~Ôv0ù®×Ÿ>9Ó¹0‚k±zá»|/Ò;´ÿôÞ•T»ö´=-Ľðü€÷¢ÕÊYú7´ª5_}˜0ÖYá/nJH‹Bio—RåÓ&‡ˆõ±økÃ+þ9>9Øß1{ 3ÓÑàĢíû€`˜éñ»dð»T n‹/žÚ÷ÇæMºÑ€†¿ó Õ$ò&ËÞ×l !ô{)N¦µ$OìM˜öàò§PÚÖ¹}´-¼XA°*º¢ÿ<`¯ÜÏ86¦< ï0ɤqlC'Èfí»ÚhdžŽûv¿|ýu‹Àbñp5Åb±xÁôÒî÷áW‡Ê®‘ºøá½Çô‘|Žë¦òFƯ.ŠéW‡Ö%jOMtáT`µ4ÚÊ®þ°$Ou…!ÿ÷¦ˆûCñ3³ÇùKH¿0…¿n‹o~¿ãâ$F$È*ž„牚]ÿî?@}÷ü[Ô†¢¼ü—öÁå}¿ãï¿÷v‡ûŸF»»ÃÑ£áî.þøÑþîÝßÿ-w !TÀé,ÛlT¬Ù¨D³AafƒrÌÊ"LGéå} .Ÿ´Ì²Iqe©¤²õñå“pÑ$X*é,زH°²¾²Õ]îØê.mlmRÆX_¼•, ²4{ԼĊ²C Ø°i‰#Ù.'¬-"üªÆ ÊŸ®8°T€žº’þVjò± þ§MëMrA‚xÿÄýÓ¤ë>[š†|ò$ä·H7$}EÂ葎êè¦&é ¦.'x6Ýh'lëÄmãtÍ$4alá4l³tçS¶Óµƒáìè#s"FöÅ–O’õè\QS^³u—ÃÜ=wÏÝs÷Ü=wÏÝs÷Ü=wÏÝs÷Ü=wÏÝs÷Ü=wÏÝs÷Ü=wOèù?f ‘L sample/auroot/Robert.Wotzlaw.txt000066400000000000000000000112071315652647700173350ustar00rootroot00000000000000Date: Thu, 1 Jul 2010 19:24:21 +0200 (CEST) From: Robert Wotzlaw To: aufs-users@lists.sourceforge.net Message-ID: <1011860217.773505.1278005061675.JavaMail.fmail@mwmweb073> Subject: Publish the script bldchraufs "Build a chroot environment based on AUFS" Mr. Junjiro R. Okajima, it would be a great pleasure for me, if You could publish the script on Your proposed git repository under the address aufs2-util.git/sample/auroot/. I have made some corrections and rewrite parts of chapter ten. Append the script hal.new at bldchraufs and renamed bldchraufs to bldchraufs.aio. The extension "aio" stand for "all in one". Create the new chapter four with instructions how to extract the scripts bldchraufs and hal.new from bldchraufs.aio. Please, could You take a look of the new version. Before I answer to Your suggestion and question, I have apologize me. You found to letters with the same contents. This was my fault. I haven't realized that the email server sends a copy of a received email to all listed members. I thought, web.de my email provider couldn't deliver the email to You and so I send it a second time. That's the burden of the electronic life. Your suggestion putting all the code in control structures is clearly the better way, but - and now come the great but - the idea behind the script is a simple introduction for the build of a chroot environment with the aid of AUFS. I think, the proposed control structures will hide the idea. For people like me it wasn't very easy to understand what AUFS can do for us. It was like reading UNIX manual pages. My experience is, UNIX manual pages aren't aids for novice. It's more a hint, go a head, look for an example and come back if you have enough experience. I personally learn much by examples and in the most cases I get the idea. With this experience in mind I wrote the script. The script should be more a base to help manifest extensive ideas based on AUFS. The writing of the script gave me more experience to make better decisions concerning AUFS and the executing of an operation system under a chroot envi- ronment. Perhaps other peoples want participate on this experience. During the writing of the script I thought, why not give the user a switch in the GNOME Desktop log in screen to run the session in the chroot environ- ment. A second thought was, why not building up the chroot environment on a persistent base. The realization of the first idea is beyond my knowledge. Perhaps other people grasp the idea. The second idea is clearly a task for a script, that need control structures for its realization. And now I will answer to Your question concerning the hal.new script. The script hal.new is a corrected version of the start up script hal. The propose of the hal script is the start of the HAL (Hardware Abstraction Layer) daemon hald. HAL is one of the interfaces between Udev and D-Bus. Without a working hald the NetworkManger, UPower and the PolicyKit have problems under a running GNOME Desktop session. The Execution of a GNOME Desktop session in a chroot environment needs a new start of the dbus and the gdm scripts. At start of the gdm script the script checks if a hald is running. If not, gdm start the hal script. The starting hal script checks if the script is running in a chroot environment. If the answer is yes, the script exit with the following error message: Can't start Hardware abstraction layer - detected chrooted session You find the code that checks the chroot session under the line 64 in the file hal.org. The file hal.org is a copy of hal. Hal is located in the directory /etc/init.d/. In the Internet I found a discussion between a Debian and Ubuntu developer, concerning the above mentioned error message. One of them suggested that the check shouldn't run with the start option. The check should only run du- ring a restart of the script. The other agreed. I lost the address of the Internet site. In the changed script hal.new I pushed the code in the function check_hal_ prerequisites() and put the function into the case assignment under the restart|force-reload branch. Before the start - see chapter five, step nine of the script bldchraufs - the user replace the script hal under the directory /tmp/jail/etc/init.d/ with the contents of the script hal.new. The new contents guaranties an error free start of the hal daemon in the chroot environment. Regards, Robert Wotzlaw Attachments: 1. All in one file bldchraufs.aio,v 0.1 2010-07-01 16:57:54+02:00 2. HAL init script hal.org ___________________________________________________________ WEB.DE DSL ab 19,99 Euro/Monat. Bis zu 150,- Euro Startguthaben und 50,- Euro Geldprämie inklusive! https://freundschaftswerbung.web.de sample/auroot/auroot000077500000000000000000000036001315652647700151260ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2005-2017 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #set -x tmp=/tmp/$$ set -e test $# -eq 0 -o "$1" = "-help" -o "$1" = "--help" && cat << EOF 1>&2 && exit 1 usage: $0 [--size rw_size] dir [command] Makes a jail or chrooted environment under the given dir for the given command such as chroot(8), but it is based upon AUFS. If you want to hide/modify something under /, then remove/modify it under AUFS or customize this script. This script includes a sample customization for apache on debian system. e.g. "sudo sh ./auroot.sh /tmp/jail /etc/init.d/apache2 start" EOF size=64M test "$1" = "--size" && { size=$2 shift 2 } dir="$1" mkdir -p "$dir" "$tmp" mount -t tmpfs -o size=$size none "$tmp" # a sample for generic mount #mount -o ro,rbind / $tmp.ro #mount -t aufs -o br:$tmp.rw:$tmp.ro aufs "$dir" # a sample for my test system, # which has /, /dev, /var and /usr on separated partitions for each. f() { mntpnt="$1" name="$2" rw="$tmp/rw.$name" mkdir -p "$rw" mount -t aufs -o br:"$rw":"$mntpnt"=ro aufs "$dir$mntpnt" } f / root for i in dev var usr do f /$i $i done # a sample for apache2 on debian f="$dir/etc/default/apache2" test -w $f && echo 'NO_START=0' >> $f exec chroot "$@" sample/auware/000077500000000000000000000000001315652647700136435ustar00rootroot00000000000000sample/auware/README.txt000066400000000000000000000222441315652647700153450ustar00rootroot00000000000000 Aufs for VMware Appliance Junjiro R. Okajima o Introduction ---------------------------------------------------------------------- A recent trend for VMware and Xen tends to consume much disk space. For instance, a typical VMware appliance is distributed as a 8GB disk image and you need such large disk space for each virtualization even if the filesystem image in it actually consumes less than 1GB. Additionally every virtual machine has the same files mostly. When you construct three virtual servers from one VMware appliance, you will need 3 x 8GB plus alpha disk spaces. More and more virtual servers, more and more disks are needed. The "plus alpha" means the part of differences between servers. But the actual necessary files are single sharable system files, 1GB for example, and the part of differences. With AUFS you can share the common part of virtual servers, and stores the server specific part individually. The basic approach is such like this. - extract the actual filesystem image from VMware appliance which we call it "common system image" - construct an NFS-exportable aufs with an empty writable branch and the readonly "common system image" - boot a virtual server with PXE and nfsroot, mount the exported aufs as its root filesystem - so you need two or three server softwares but they can live in a single host + NFS server + TFTP server + DHCP server (optional) Finally you can save much of disk space, 3 x (8GB + diff) > 8GB + 3 x diff In this document, I will describe the sample steps to build such environment. o VMware appliance ---------------------------------------------------------------------- Here is an assumption. + you already have VMware environment. you can get and start any VMware appliance, and know how to customize it. In this sample, we use debian appliance where you can get http://mirror.o-line.net/vmware/debian-4.0r1-netinst.7z - unpack it $ 7z x debian-4.0r1-netinst.7z ::: Extracting debian-4.0r1-netinst/debian-4.0r1-netinst-f001.vmdk Extracting debian-4.0r1-netinst/debian-4.0r1-netinst-f002.vmdk Extracting debian-4.0r1-netinst/debian-4.0r1-netinst-f003.vmdk Extracting debian-4.0r1-netinst/debian-4.0r1-netinst-f004.vmdk Extracting debian-4.0r1-netinst/debian-4.0r1-netinst-f005.vmdk Extracting debian-4.0r1-netinst/debian-4.0r1-netinst.vmdk Extracting debian-4.0r1-netinst/debian-4.0r1-netinst.nvram Extracting debian-4.0r1-netinst/vmware-0.log Extracting debian-4.0r1-netinst/vmware.log Extracting debian-4.0r1-netinst/debian-4.0r1-netinst.vmx Extracting debian-4.0r1-netinst/debian-4.0r1-netinst.vmxf Extracting debian-4.0r1-netinst/debian-4.0r1-netinst.vmsd Extracting debian-4.0r1-netinst ::: - check the disk partitions $ file debian-4.0r1-netinst-f00*.vmdk debian-4.0r1-netinst-f001.vmdk: x86 boot sector; partition 1: ID=0x83, active, starthead 1, startsector 63, 15952482 sectors; partition 2: ID=0x5, starthead 0, startsector 15952545, 819315 sectors, code offset 0x48 debian-4.0r1-netinst-f002.vmdk: data debian-4.0r1-netinst-f003.vmdk: data debian-4.0r1-netinst-f004.vmdk: data debian-4.0r1-netinst-f005.vmdk: data It shows that there are two disk partitions, one is from 63 sector and has 15952482 sectors, the other is from 15952545 sector and has 819315 sectors. The latter is swap space, so let's ignore it here. - extract the filesystem image $ cat debian-4.0r1-netinst-f00?.vmdk | dd ibs=512 skip=63 count=15952482 obs=32m of=etch.ext3 $ fsck.ext3 -nf etch.ext3 $ sudo mount -o ro,loop etch.ext3 /mnt $ df /mnt Now you get the ext3 filesystem image which is stored in the VMware appliance. In this sample, we create a brand new ext2 fs-image and duplicate the system files since it will be an unchanged readonly fs and ext3's journaling feature is unnecessary. $ dd if=/dev/zero of=etch.ext2 bs=1M count=1k $ mkfs -t ext2 etch.ext2 $ sudo mount -o loop ./etch.ext2 /tmp/w $ sudo chown 0:0 /tmp/w $ sudo rsync -aqHSEx --numeric-ids /mnt/* /tmp/w $ df /tmp/w Let's make sure that our files are all fine. $ cd /mnt $ sudo find . -printf '%M %n %U %G %s %t %p %l\n' | > sort -k 11 | > awk ' /^l/ {print $1, $2, $3, $4, $5, $11, $12; next} /^d/ {print $1, $2, $3, $4, $6, $7, $8, $9, $10, $11, $12; next} {print}' >| /tmp/l1 $ cd /tmp/w $ sudo find . -printf '%M %n %U %G %s %t %p %l\n' | sort -k 11 >| /tmp/l2 $ diff -u /tmp/l[12] $ sudo umount /mnt Now we have the base system under /tmp/w. Next let's customize the common part. - customize the common part In order to disable the root and swap partition on the local (virtual) disk, edit fstab. The file will be such like this. $ sudo vi /tmp/w/etc/fstab proc /proc proc defaults 0 0 #/dev/sda1 / ext3 defaults,errors=remount-ro 0 1 #/dev/sda5 none swap sw 0 0 /dev/hdc /media/cdrom0 udf,iso9660 user,noauto 0 0 /dev/fd0 /media/floppy0 auto rw,user,noauto 0 0 In this sample, we use 192.168.1.2 as local DNS server. $ sudo vi /tmp/w/etc/resolv.conf nameserver 192.168.1.2 In this debian VMware appliance, the server to retrieve the packages is registered in /etc/apt/sources.list, and it still has CD-ROM entry. Let's delete it. Of course, you can customize the package server as you like. $ sudo vi /tmp/w/etc/apt/sources.list #deb cdrom:[Debian GNU/Linux 4.0 r1 _Etch_ - Official i386 NETINST Binary-1 20070820-20:21]/ etch contrib main deb http://debian.lcs.mit.edu/debian/ etch main deb-src http://debian.lcs.mit.edu/debian/ etch main deb http://security.debian.org/ etch/updates main contrib deb-src http://security.debian.org/ etch/updates main contrib If you don't live within the timezone of EST, it is better to change the timezone too. $ sudo chroot /tmp/w tzconfig Now we finished the customization of the "common system image". Unmount it and keep the filesystem image. $ sudo umount /tmp/w o AUFS on NFS server ---------------------------------------------------------------------- In this sample, I will not describe the generic NFS server issues. Refer to the other documents if you need. Building and loading the aufs module is easy, but you should enable CONFIG_AUFS_EXPORT and some other configurations. It purely depends on your kernel version, and is described in other documents in aufs. Please refer them too. Here is a sample step to mount and export aufs. $ mkdir /tmp/ro /tmp/rw /tmp/u $ sudo mount -o ro,loop etch.ext2 /tmp/ro $ sudo mount -t aufs -o br=/tmp/rw:/tmp/ro=rr none /tmp/u $ sudo exportfs -i -o rw,async,no_subtree_check,no_root_squash,fsid=999 \*:/tmp/u Please refer to the other manual if you want to know how to use exportfs command. In this sample, I just inform you some important things. - export it with remote root user can write - specify fsid since aufs has no own real disk device o New VMware appliance ---------------------------------------------------------------------- In this sample, we create a new VMware appliance which doesn't have any local disk. So it will never swap-out. Of course you can create local disk and use it as swap area if you want. Here we just create a new virtual server by VMware Server Console. I hope you know it already and I won't describe here. But this sample uses its MAC address in next section to set-up the TFTP/DHCP server. o TFTP/DHCP server ---------------------------------------------------------------------- This part is entirely depends upon your environment. On my test environment, I used dnsmasq and tftpd-hpa tools. Here is what I did. - /etc/dnsmasq.conf # assign 192.168.1.99 for a vmware debian dhcp-range=192.168.1.99,192.168.1.99,12h dhcp-host=00:1a:2b:3c:4d:5e,192.168.1.99 # boot it with pxe dhcp-boot=pxelinux.0,jrodns,192.168.1.2 # gateway dhcp-option=3,192.168.1.1 # dns server dhcp-option=6,192.168.1.2 The MAC address here is created by VMware Server Console. You may not need it in your environment. Now restart dnsmasq daemon. Fortunately, the VMware appliance we use here supports nfsroot and we just need to pass some parameters to boot it. Also you don't need to use MAC address as a filename of configuration. You can freely customize your TFTP server. The important thing is booting the VMware appliance with nfsroot. - tftpd-hpa $ cp /usr/lib/syslinux/pxelinux.0 /var/lib/tftpboot $ mkdir /var/lib/tftpboot/pxelinux.cfg $ vi 01-00-1a-2b-3c-4d-5e default linux #prompt 1 timeout 600 label linux kernel auware/etch/vmlinuz-2.6.18-5-686 append initrd=auware/etch/initrd.img-2.6.18-5-686 root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.102:/tmp/u,v3 ip=dhcp noresume - copy the kernel and initramfs image from the vmware appliance to /var/lib/tftpboot/auware $ mkdir -p /var/lib/tftpboot/auware/etch $ cp -ip /tmp/ro/boot/vmlinuz-2.6.18-5-686 \ /tmp/ro/boot/initrd.img-2.6.18-5-686 \ /var/lib/tftpboot/auware/etch Of course you don't need to use DHCP if you don't want. In this case, you will change the kernel parameters too. ---------------------------------------------------------------------- Now our preparations are all completed. Boot your virtual server from VMware Server Console. It will boot with nfsroot and the all modification will be put under /tmp/w on NFS server. Enjoy! sample/brsync/000077500000000000000000000000001315652647700136575ustar00rootroot00000000000000sample/brsync/README.txt000066400000000000000000000127341315652647700153640ustar00rootroot00000000000000 Brsync -- synchronize files between two aufs branches J. R. Okajima Let's assume a system such like this, - aufs with only two branches - one lower readonly branch on SSD - one upper read/write branch on tmpfs - using the system for a while, usage of the tmpfs grows - you may want to reduce usage of tmpfs and regain the system main memory - usage of SSD never change since it is readonly, even if you remove some larger files in aufs - you also may want to reduce usage of SSD - yes, it is ASUS EeePC. :-) In this case, I'd recommend you to try aubrsync script in aufs2-util.git tree. It executes rsync(1) between the two branches. SYNTAX ---------------------------------------------------------------------- aubrsync Options move | move_with_wh | copy \ mntpnt src_branch dst_branch [ options for rsync ] generic form: aubrsync [ -w | --wh ] [ -i | --inotify ] Options \ mntpnt cmd [ parameters for cmd ] Options: [ -n | --dry_run ] [ -q | --quiet ] ---------------------------------------------------------------------- SIMPLE EXAMPLES ---------------------------------------------------------------------- 1. # mount -t aufs -o br:/rw:/ro none /u # aubrsync copy /u /rw /ro The script executes rsync(1) and, - remove the whiteout-ed files in /ro - COPY the non-whiteouted files in /rw to /ro 2. # mount -t aufs -o br:/rw:/ro none /u # aubrsync move /u /rw /ro This is similar to above except COPY. The operation 'move' removes the non-whiteouted files in /rw by rsync(1). After rsync(1), the script finds all whiteouts in /rw and removes them too. After this aubrsync, /rw will be almost empty. For the operation 'move_with_wh', see the sample for 'shwh.' ---------------------------------------------------------------------- EXAMPLES IN DETAIL ---------------------------------------------------------------------- The dst_branch must be mounted as writable. During the operation, the mntpnt is set readonly. If you are opening a file for writing on the writable branch, you need to close the file before invoking this script. The -w or --wh option requires CONFIG_AUFS_SHWH enabled. The -i or --inotify option requires CONFIG_AUFS_HINOTIFY enabled. 'copy' is a shortcut for aubrsync mntpnt \ rsync --exclude=lost+found -aHSx --devices --specials --delete-before mntpnt/ dst_branch 'move' is a shortcut for aubrsync mntpnt \ "rsync --exclude=lost+found -aHSx --devices --specials --delete-before \ mntpnt/ dst_branch && \ find src_branch -xdev -depth \( \ \( ! -type d \ \( -name .wh..wh..opq \ -o ! -name .wh..wh.\* \) \) \ -o \( -type d \ ! -name .wh..wh.\* \ ! -wholename src_branch \ ! -wholename src_branch/lost+found \) \ \) -print0 |\ xargs -r0 rm -fr" Note: in most cases, you will need '-i' option, and find(1) is invoked by aubrsync only when rsync(1) succeded. 'move_with_wh' is a simple variation of 'move' which moves whiteouts separately before the actual 'move'. examples: - Copy and reflect all the modification (modifed files, newly created and removed ones) in the upper branch to the lower branch. This operation is for aufs which has only 2 branches, and mainly for a system shutdown script. All files on the upper branch remain. $ sudo aubrsync copy /your/aufs /your/upper_branch /your/lower_branch - Like above (2 branches), move and reflect all modifications from upper to lower. Almost all files on the upper branch will be removed. You can still use this aufs after the operation. But the inode number may be changed. If your application which depends upon the inode number was running at that time, it may not work correctly. $ sudo aubrsync move /your/aufs /your/upper_branch /your/lower_branch ---------------------------------------------------------------------- NOTE ---------------------------------------------------------------------- Since aubrsync handles the aufs branch directly (bypassing aufs), you need special care. One recomendation is to execute in the system shutdown script. It will keep the source aufs branch from modifying, and you can copy/move files in safe. Otherwise you need to enable CONFIG_AUFS_HINOTIFY and specify -i option to aubrsync. The -i option remounts aufs with udba=inotify internaly and executes 'syncing'. Although even if you use -i, other processes in your system may modify the files in aufs. If it happens, the copied/moved files to the lower branch may be obsoleted. ---------------------------------------------------------------------- Some tips for ASUS EeePC users. ---------------------------------------------------------------------- o log files - Generally the log files are unnecessary to be stacked by aufs. - Exclude them by mounting tmpfs at /var/log (like /tmp). Recreating directoies may be necessary. Customizing /etc/syslogd.conf is good too. - If you want to keep them even after reboot, forget about this approach. o xino files - The xino files should not put in SSD since it is written frequently. While I am not sure how it damges the life of SSD, I'd suggest you to put them in tmpfs. o ~/.xsession-errors Currently it grows unconditionally, and I'd like to sugget you to remove it just before starting-up the X server. You may want to keep the old contents of the file, in this case it is better to rename it to .xsession-errors.old or something. I am afraid this file can be one of the disk space pressure. ---------------------------------------------------------------------- sample/logrow/000077500000000000000000000000001315652647700136705ustar00rootroot00000000000000sample/logrow/README.txt000066400000000000000000000014411315652647700153660ustar00rootroot00000000000000 Logrow -- expand the size of the mounted loopback block device J. R. Okajima Some of the linux filesystems can grow its size with being mounted by a special tool. For example, the patched EXT2 and the native XFS. While the filesystem supports growing its size, the loopback block device doesn't. The logrow patch which was merged into linux-2.6.30, and a utility logrow.c, expands the size of the loopback device. If you specify its backend file, then the utility expands it too. You don't have to unmount it. It may be useful for aufs users who wants to use the loopback as a writable branch. - use linux-2.6.30 and later - make the logrow executable you should make sure that the header file include path points to your kernel tree. - read test.sh - and you will know how to use it Enjoy! sample/logrow/logrow.c000066400000000000000000000070761315652647700153570ustar00rootroot00000000000000/* * Copyright (C) 2005-2010 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #define _GNU_SOURCE #include char *me; void usage(FILE *f) { fprintf(f, "%s [options] loop_dev [backend_file]\n" "-s, --set new_size_in_bytes\n" "\twhen backend_file is given, " "it will be expanded too while keeping the original contents\n", me); } struct option opts[] = { { .name = "set", .has_arg = 1, .flag = NULL, .val = 's' }, { .name = "help", .has_arg = 0, .flag = NULL, .val = 'h' } }; void err_size(char *name, __u64 old) { fprintf(stderr, "size must be larger than current %s (%llu)\n", name, old); } int expand(char *fname, __u64 new) { int err, fd; __u64 append; size_t sz; ssize_t ssz; const size_t one_g = 1 << 30; struct stat st; char *p; err = -1; fd = open(fname, O_WRONLY | O_APPEND); if (fd < 0) goto out_p; err = fstat(fd, &st); if (err) goto out_p; err = -1; if (new < st.st_size) { err_size(fname, st.st_size); goto out; } append = new - st.st_size; sz = append; if (sz > one_g) sz = one_g; while (1) { p = calloc(sz, 1); if (p) break; sz >>= 1; if (!sz) { errno = ENOMEM; goto out_p; } } err = 0; while (append > 0) { if (append < sz) sz = append; ssz = write(fd, p, sz); if (ssz == -1) { if (errno == EAGAIN || errno == EINTR) continue; err = -1; break; } append -= ssz; } free(p); if (err) goto out_p; err = fsync(fd); if (err) goto out_p; err = close(fd); if (!err) goto out; /* success */ out_p: perror(fname); out: return err; } int main(int argc, char *argv[]) { int fd, err, c, i; __u64 old, new; FILE *out; char *dev; err = EINVAL; out = stderr; me = argv[0]; new = 0; while ((c = getopt_long(argc, argv, "s:h", opts, &i)) != -1) { switch (c) { case 's': errno = 0; new = strtoull(optarg, NULL, 0); if (errno) { err = errno; perror(argv[i]); goto out; } break; case 'h': err = 0; out = stdout; goto err; default: perror(argv[i]); goto err; } } if (optind < argc) dev = argv[optind++]; else goto err; fd = open(dev, O_RDONLY); if (fd < 0) { err = errno; perror(dev); goto out; } err = ioctl(fd, BLKGETSIZE64, &old); if (err) { err = errno; perror("ioctl BLKGETSIZE64"); goto out; } if (!new) { printf("%llu\n", old); goto out; } if (new < old) { err = EINVAL; err_size(dev, old); goto out; } if (optind < argc) { err = expand(argv[optind++], new); if (err) goto out; } err = ioctl(fd, LOOP_SET_CAPACITY, new); if (err) { err = errno; perror("ioctl LOOP_SET_CAPACITY"); } goto out; err: usage(out); out: return err; } sample/logrow/test.sh000066400000000000000000000027211315652647700152050ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2005-2010 Junjiro R. Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # $Id: test.sh,v 1.3 2009/01/26 06:24:45 sfjro Exp $ tmp=/tmp/$$ img=$tmp.img dir=$tmp.xfs set -eux dd if=/dev/null of=$img bs=1k seek=16k mkfs -t xfs -q -b size=1024 -f $img mkdir -p $dir #sudo mount -vo loop $img $dir dev=$(sudo mount -vo loop $img $dir | tail -n 1 | sed -e 's:^.*loop=\(/dev/loop[/0-9]*\).*$:\1:') test $(sudo ./logrow $dev) -eq $((16*1024*1024)) df $dir sudo chmod a+w $dir echo abc > $dir/a mkdir $dir/b ln $dir/a $dir/b/c sz=$((32*1024*1024)) sudo strace ./logrow -s $sz $dev $img test $(sudo ./logrow $dev) -eq $sz sudo xfs_growfs $dir df $dir for i in a b/c do echo abc | diff -u - $dir/$i done dd if=/dev/zero bs=1M of=$dir/full && false ls -l $dir/full sudo umount $dir rm -fr $tmp $tmp.*sample/shwh/000077500000000000000000000000001315652647700133305ustar00rootroot00000000000000sample/shwh/README.txt000066400000000000000000000112271315652647700150310ustar00rootroot00000000000000 Maintain Aufs Branches Using SHWH mode Junjiro R. Okajima Originally aufs hides the whiteout totally and users cannot see/handle them in aufs. One user, Michael Towers suggested a new aufs mount option which makes the whiteouts visible. When I read his idea, to be honest, I was confused. Visible whiteout?? Discussing about his idea by many mails, I could understand what he wants and I implemented 'shwh' option. After he confirmed that was exactly what he wanted, he sent a document "EXAMPLE USAGE OF THE 'shwh' OPTION" to aufs-users ML. With this option and his sample, you can merge aufs branches containing whiteouts, and create a new squashfs image. Here is a little modified version. If you want to check his original version, see the url above. ---------------------------------------------------------------------- ######################################################## # EXAMPLE USAGE OF THE 'shwh' OPTION ------ 2008.03.11 # # Michael Towers # # slightly modified by sfjro # ######################################################## The show-whiteout ('shwh') option (CONFIG_AUFS_SHWH is required) can be used to merge aufs branches containing whiteouts. This example is based on the usage in larch-5.2 (http://larch.berlios.de), a live USB-stick construction kit, based on Arch Linux. The live system has an aufs root mount comprising three layers: Bottom: 'system', squashfs (underlying base system), read-only Middle: 'mods', squashfs, read-only Top: 'overlay', ram (tmpfs), read-write The top layer is loaded at boot from a tar-lzo archive, which can also be saved at shutdown, to preserve the changes made to the system during the session. When larger changes have been made, or smaller changes have accumulated, the tar-lzo archive will have reached a size where loading and saving it take an appreciable time. At this point, it would be nice to be able to merge the two overlay branches ('mods' and 'overlay') and rewrite the 'mods' squashfs, clearing the top layer and thus restoring save and load speed. This merging is simplified by the use of another aufs mount, of just the two overlay branches using the new 'shwh' option. In larch, access to the individual branches of the root aufs is made possible by using 'mount -o bind' in the initramfs. The tmpfs is made available at /.livesys, containing mount points /.livesys/mods and /.livesys/overlay for the two overlay branches. The new, merging aufs mount will be at /.livesys/merge_union and it can be prepared using the command: # mount -t aufs \ -o ro,shwh,br:/.livesys/overlay=ro+wh:/.livesys/mods=rr+wh \ aufs /.livesys/merge_union Note that the aufs mount must be 'ro'. A merged view of the two overlay branches is then available at /.livesys/merge_union, and the new feature is that the whiteouts (.wh..wh..opq, etc.) are visible! [[[ Remounting is also possible, e.g. # mount -t aufs -o ro,remount,shwh,br:b1=ro+wh:b2=ro+wh aufs mp Making the whiteouts vanish again is also possible: # mount -o remount,noshwh mp ]]] It is now possible to save the combined contents of the two overlay branches to a new squashfs, e.g.: # mksquashfs /.livesys/merge_union /path/to/newmods.squash This new squashfs archive can be stored on the boot device and the initramfs will use it to replace the old one at the next boot. [[[ A new tar-lzo overlay must of course also be built, e.g. (retaining as root directory 'overlay'): # tar -cf - -C /path/to overlay | lzop > /path/to/newoverlay.tar.lzo ]]] Share and Enjoy! mt ---------------------------------------------------------------------- You may also want trying aubrsync utility in aufs2-util.git tree. For example, here is a sample script to create a new ext2fs image as a middle layer in live aufs. ---------------------------------------- #!/bin/sh AufsMntpnt=/aufs tmp=/tmp/$$ . /etc/default/aufs # initial state, you can change it anything you like. sudo mount -t aufs -o br:/rw:/ro none $AufsMntpnt # body sudo mount -o remount,ro,shwh $AufsMntpnt cd $AufsMntpnt sudo mount -o remount,rw ../ro dd if=/dev/zero of=$tmp.img bs=4k count=1k mkfs -t ext2 -F -q $tmp.img mkdir $tmp.br sudo mount -o rw,loop $tmp.img $tmp.br sudo mount -vo remount,ro,ins:1:$tmp.br=ro+wh $AufsMntpnt sudo aubrsync _move $AufsMntpnt ../rw ../ro \ "--remove-source-files \ --exclude=$AUFS_WH_BASE --exclude=$AUFS_WH_PLINKDIR --exclude=$AUFS_WH_ORPHDIR \ ../rw/ $tmp.br; mount -o remount,ro $tmp.br" sudo mount -o bind $tmp.br ../ro sudo umount ../ro sudo mount -o remount,del:$tmp.br $AufsMntpnt sudo umount $tmp.br ---------------------------------------- sample/uloop/000077500000000000000000000000001315652647700135155ustar00rootroot00000000000000sample/uloop/00readme.txt000066400000000000000000000144761315652647700156670ustar00rootroot00000000000000 ULOOP -- Loopback block device in userspace (and a sample for HTTP and generic block device) Junjiro Okajima 0. Introduction As you know, there is a Loopback block device in Linux, /dev/loop, which enables you to mount a fs-image local file. Also it can adopt a userspace program, such as cryptloop. This sample ULOOP driver makes it generic, and enables to adopt any userspace program. You can give an empty or non-existing file to /dev/loop backend. When a process reads from /dev/loop, this dirver wakes a user process up and passes the I/O transaction to it. A user process makes the required block ready and tells the driver. Then the driver completes the I/O transaction. Also there is sample scripts or usage for diskless nodes working with aufs. This driver may work with it well. The name is unrelated to YouTube. :-) 1. sample for HTTP Simple 'make' will build ./drivers/block/uloop.ko and ./ulohttp. Ulohttp application behaves like losetup(8). Additionally, ulohttp is an actual daemon which handles I/O request. Here is a syntax. ulohttp [-b bitmap] [-c cache] device URL The device is /dev/loopN and the URL is a URL for fs-image file via HTTP. The http server must support byte range (Range: header). The bitmap is a new filename or previously specified as the bitmap for the same URL. Its filesize will be 'the size of the specified fs-image / pagesize (usually 4k) / bits in a byte (8)', and round-up to pagesize. The cache is a new filename or previously specified as the cache for the same URL. Its filesize will be 'the size of the specified fs-image', and round-up to pagesize. Note that both the bitmap and the cache are re-usable as long as you don't change the filedata and URL. When someone reads from the specified /dev/loopN, or accesses a file on a filesystem after mounting /dev/loopN, ULOOP driver first checks the corresponding bit in the bitmap file. When the bit is not set, which means the block is not retrieved yet, it passes the offset and size of the I/O request to ulohttp daemon. Ulohttp converts the offset and the size into HTTP GET request with Range header and send it to the http server. Retriving the data from the http server, ulohttp stores it to the cache file, and tells ULOOP driver that the HTTP transfer completes. Then the ULOOP driver sets the corresponding bit in the bitmap, and finishes the I/O/request. In other words, it is equivalent to this operation. $ wget URL_for_fsimage $ sudo mount -o loop retrieved_fsimage /mnt But ULOOP driver and ulohttp retrieves only the data (block) on-demand, and stores into the cache file. The first access to a block is slow since it involves HTTP GET, but the next access to the same block is fast since it is in the local cache file. In this case, the behaviour is equivalent to the simple /dev/loop device. o Note - ulohttp requires libcurl. - ulohttp doesn't support HTTP PUT or POST, so the device rejects WRITE operation. - ulohttp doesn't have a smart exit routine. - This sample is "proof-of-concepts", do not expect the maturity level too much. - This driver and the sample is developed and tested on linux-2.6.21.3. - If you implement other protocols such like nbd/enbd, iscsi, aoe or something, instead of http, I guess it will be fantastic. :-) o Usage $ make $ sudo modprobe loop $ sudo insmod ./drivers/block/uloop.ko $ dev=/dev/loop7 $ ./ulohttp -b /tmp/b -c /tmp/c $dev http://whatever/you/like $ sudo mount -o ro $dev /mnt $ ls /mnt ::: $ sudo umount /mnt $ killall ulohttp $ sudo losetup -d $dev 2. sample for generic block device The sample `ulohttp' (above) retrieves data from a remote host via HTTP, and stores it into a local file as a cache. It means you can reduce the network traffic and the workload on a remote server. As you can guess easily, this scheme is also effective to a local disk device, especially when you want to make your disk and spin down/off it. Recent flash memory is getting larger and cheaper. You can cache the whole contents of your harddrive into a file on your flash. Here is a sample for it, `ulobdev.' The basic usage is very similar to `ulohttp'. See above. Of course, it is available for remote block devices too, such as nbd/enbd, iscsi and aoe. You should not mount the backend block device as readwrite, since it modifies the superblock of the filesystem on the block device even if you don't write anything to it. Currently this sample supports readonly mode only. If someone is interested in this approach and sample, I will add some features which will support read/write mode and write-back to the harddrive periodically, and discard/re-create the cache file. 3. libuloop API - int ulo_init(struct ulo_init *init); struct ulo_init { char *path[ULO_Last]; int dev_flags; unsigned long long size; }; enum {ULO_DEV, ULO_CACHE, ULO_BITMAP, ULO_Last}; Initializes ULOOP driver. All members in struct ulo_init must be set before you call ulo_init(). + path[ULO_DEV] pathname of loopback device such as "/dev/loopN". + path[ULO_CACHE] pathname of a cache file. A userspace program stores the real data to this file. + path[ULO_BITMAP] pathname of a bitmap file. The ULOOP driver sets the bit which is corresponding the block number when the block is filled by a userspace program. When the bit is not set, ULOOP driver invokes the userspace program. + dev_flags Flags for open(2) of path[ULO_DEV]. + size the size of real data. the ULOOP library set this size to the cache file after creating it internally. - int ulo_loop(int sig, ulo_cb_t store, void *arg); typedef int (*ulo_cb_t)(unsigned long long start, int size, void *arg); Waits for a I/O request from ULOOP driver. When a user accesses a ULOOP device, ULOOP driver translates the request to the offset in the cache file and the requested size, and invokes the user-defined callback function which is specified by `store.' The function `store' must fill the data in the cache file following the given offset and size. You can add an argument `arg' for the callback function. - extern const struct uloop *uloop; struct uloop { int fd[ULO_Last]; int pagesize; unsigned long long tgt_size, cache_size; }; A global variable in ULOOP library. Usually you will need 'ulo_cache_fd` only. See below. #define ulo_dev_fd ({ uloop->fd[ULO_DEV]; }) #define ulo_cache_fd ({ uloop->fd[ULO_CACHE]; }) #define ulo_bitmap_fd ({ uloop->fd[ULO_BITMAP]; }) Enjoy! sample/uloop/Makefile000066400000000000000000000030421315652647700151540ustar00rootroot00000000000000 # aufs sample -- ULOOP driver # Copyright (C) 2005-2010 Junjiro Okajima # # This program, aufs is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA KDIR = /lib/modules/$(shell uname -r)/build UserLib = libuloop.a UserAp = ulohttp ulobdev Tgt= ${UserLib} ${UserAp} UloopVars = /tmp/uloopvars.mk all: drivers/block/uloop.ko ${Tgt} # the environment variables are not inherited since 2.6.23 drivers/block/uloop.ko clean: MAKE += ULOOP_EXTRA_CFLAGS=-I${CURDIR}/include drivers/block/uloop.ko clean: MAKE += -C ${KDIR} M=${CURDIR}/drivers/block CONFIG_BLK_DEV_ULOOP=m drivers/block/uloop.ko: ${MAKE} modules clean: ${MAKE} $@ ${RM} ${Tgt} ${UloopVars} *.o *~ UserCFLAGS = -g -Wall -D_FILE_OFFSET_BITS=64 -DNDEBUG -I${CURDIR}/include -UNDEBUG libuloop.a: CFLAGS=${UserCFLAGS} libuloop.a: libuloop.a(libuloop.o) ${UserAp}: CFLAGS += ${UserCFLAGS} ${UserAp}: LDLIBS += -L${CURDIR} -luloop ulohttp: LDLIBS += -lcurl -include priv.mk sample/uloop/drivers/000077500000000000000000000000001315652647700151735ustar00rootroot00000000000000sample/uloop/drivers/block/000077500000000000000000000000001315652647700162655ustar00rootroot00000000000000sample/uloop/drivers/block/Makefile000066400000000000000000000002511315652647700177230ustar00rootroot00000000000000 # the environment variables are not inherited since 2.6.23 ifdef ULOOP_EXTRA_CFLAGS EXTRA_CFLAGS += ${ULOOP_EXTRA_CFLAGS} endif obj-$(CONFIG_BLK_DEV_ULOOP) += uloop.o sample/uloop/drivers/block/uloop.c000066400000000000000000000332241315652647700175730ustar00rootroot00000000000000/* * aufs sample -- ULOOP driver * * Copyright (C) 2005-2010 Junjiro Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include /* ---------------------------------------------------------------------- */ /* in struct loop_device */ #define private_data key_data struct ulo_queue { spinlock_t spin; struct list_head head; wait_queue_head_t wq; }; struct ulo_qelem { struct list_head list; union ulo_ctl ctl; }; enum {UloQ_READY, UloQ_RCVREQ, UloQ_SNDRES, UloQ_Last}; struct ulo_dev { struct ulo_queue queue[UloQ_Last]; struct file *bmp; struct mutex bmpmtx; unsigned long bmpidx; unsigned long *bmpbuf; unsigned long long bmpsz; }; static struct kmem_cache *ulo_cache; #define UloMsg(level, fmt, args...) \ printk(level ULOOP_NAME ":%s[%d]:%s:%d: " fmt, \ current->comm, current->pid, __func__, __LINE__, ##args) #define UloErr(fmt, args...) UloMsg(KERN_ERR, fmt, ##args) #if 1 #define UloDebugOn(c) BUG_ON(!!(c)) #define UloDbg(fmt, args...) UloMsg(KERN_DEBUG, fmt, ##args) #define UloDbgErr(e) if (e) UloDbg("err %d\n", e) #define UloDbg1(fmt, args...) do { \ static unsigned char c; \ if (!c++) \ UloDbg(fmt, ##args); \ } while (0); #else #define UloDebugOn(c) do {} while(0) #define UloDbg(fmt, args...) do {} while(0) #define UloDbgErr(e) do {} while(0) #define UloDbg1(fmt, args...) do {} while(0) #endif /* ---------------------------------------------------------------------- */ static void ulo_append(struct ulo_dev *dev, int qindex, struct ulo_qelem *qelem) { struct ulo_queue *queue; spinlock_t *spin; queue = dev->queue + qindex; spin = &queue->spin; spin_lock(spin); list_add_tail(&qelem->list, &queue->head); spin_unlock(spin); #if 1 if (1 || qindex == UloQ_READY) wake_up(&queue->wq); #if 0 else wake_up_all(&queue->wq); #endif #else wake_up_all(&queue->wq); #endif } static int ulo_queue_lock_nonempty(struct ulo_queue *queue) //__acquires(QueueSpin) { int empty; spin_lock(&queue->spin); empty = list_empty(&queue->head); if (empty) spin_unlock(&queue->spin); return !empty; } static struct ulo_qelem *ulo_wait(struct ulo_dev *dev, int qindex) //__releases(QueueSpin) { struct ulo_qelem *qelem; struct ulo_queue *queue; int err; spinlock_t *spin; qelem = NULL; queue = dev->queue + qindex; spin = &queue->spin; while (!qelem) { err = wait_event_interruptible (queue->wq, ulo_queue_lock_nonempty(queue)); if (unlikely(err)) { qelem = ERR_PTR(err); break; } qelem = list_entry(queue->head.next, struct ulo_qelem, list); list_del(&qelem->list); spin_unlock(spin); } return qelem; } /* ---------------------------------------------------------------------- */ static int ulo_write(struct file *file, void *buf, size_t sz, loff_t *ppos) { int err; ssize_t ret; mm_segment_t oldfs; err = 0; oldfs = get_fs(); set_fs(KERNEL_DS); ret = vfs_write(file, (const char __user *)buf, sz, ppos); set_fs(oldfs); if (unlikely(ret != sz)) { err = ret; if (ret <= sz) { UloDbg("ret %ld, sz %lu\n", (long)ret, (unsigned long)sz); err = -EIO; } } return err; } static int ulo_bmp_pindex(struct ulo_dev *dev, unsigned long pindex) { int err, e2; loff_t pos; struct file *bmp; void *bmpbuf; UloDebugOn(!mutex_is_locked(&dev->bmpmtx)); if (dev->bmpidx == pindex) return 0; err = -EIO; if (unlikely(dev->bmpsz / PAGE_SIZE < pindex)) goto out; bmp = dev->bmp; bmpbuf = dev->bmpbuf; pos = dev->bmpidx; pos *= PAGE_SIZE; err = ulo_write(bmp, bmpbuf, PAGE_SIZE, &pos); if (unlikely(err)) goto out; UloDebugOn(dev->bmpsz != i_size_read(bmp->f_dentry->d_inode)); pos = pindex; pos *= PAGE_SIZE; UloDebugOn(dev->bmpsz < pos + PAGE_SIZE); err = kernel_read(bmp, pos, bmpbuf, PAGE_SIZE); if (unlikely(err != PAGE_SIZE)) { if (0 <= err) { UloDbg("%d\n", err); err = -EIO; } /* restore */ e2 = kernel_read(bmp, pos - PAGE_SIZE, bmpbuf, PAGE_SIZE); if (e2 != PAGE_SIZE) { UloDbg("%d\n", e2); err = -EIO; } goto out; } err = 0; dev->bmpidx = pindex; out: UloDbgErr(err); return err; } /* ---------------------------------------------------------------------- */ static int ulo_xfer_begin(struct ulo_dev *dev, unsigned long long start, int size) { int err; struct ulo_qelem *qelem; struct ulo_queue *queue; struct ulo_ctl_ready ready; spinlock_t *spin; BUILD_BUG_ON(sizeof(qelem->ctl.rcvreq.start) != sizeof(start) || sizeof(qelem->ctl.rcvreq.size) != sizeof(size)); //UloDbg("start %Lu, size %d\n", start, size); qelem = ulo_wait(dev, UloQ_READY); err = PTR_ERR(qelem); if (IS_ERR(qelem)) goto out; ready = qelem->ctl.ready; qelem->ctl.rcvreq.start = start; qelem->ctl.rcvreq.size = size; queue = dev->queue + UloQ_RCVREQ; spin = &queue->spin; spin_lock(spin); list_add_tail(&qelem->list, &queue->head); spin_unlock(spin); /* wake up the user process */ err = kill_pid(ready.pid, ready.signum, 0); out: UloDbgErr(err); return err; } static int ulo_xfer_end(struct ulo_dev *dev, unsigned long long start, int size, unsigned long pindex, int bit) { int err; struct ulo_qelem *qelem; union ulo_ctl ctl; struct mutex *mtx; BUILD_BUG_ON(sizeof(ctl.sndres.start) != sizeof(start) || sizeof(ctl.sndres.size) != sizeof(size)); #if 0 UloDbg("start %Lu, size %d, pindex %lu, bit %d\n", start, size, pindex, bit); #endif while (1) { qelem = ulo_wait(dev, UloQ_SNDRES); if (IS_ERR(qelem)) return PTR_ERR(qelem); if (qelem->ctl.sndres.start == start && qelem->ctl.sndres.size == size) break; /* this is not what I want. return it */ ulo_append(dev, UloQ_SNDRES, qelem); } /* set bitmap */ mtx = &dev->bmpmtx; mutex_lock(mtx); err = ulo_bmp_pindex(dev, pindex); if (!err) set_bit(bit, dev->bmpbuf); mutex_unlock(mtx); kmem_cache_free(ulo_cache, qelem); UloDbgErr(err); return err; } static int uloop_xfer(struct loop_device *lo, int cmd, struct page *raw_page, unsigned int raw_off, struct page *loop_page, unsigned int loop_off, int size, sector_t real_block) { int err, set, bit, sz; struct ulo_dev *dev; char *raw_buf, *loop_buf; loff_t pos, loff; unsigned long pindex; struct mutex *mtx; const unsigned long bmp_page_bytes = PAGE_SIZE * BITS_PER_BYTE; #if 0 UloDbg("raw_off %u, loop_off %u, sz %d, real_block %lu\n", raw_off, loop_off, size, real_block); #endif #if 0 err = -EACCES; if (unlikely(cmd != READ)) goto out; #endif err = -ESRCH; dev = lo->private_data; if (unlikely(!dev || !dev->bmp)) goto out; pos = real_block; //pos *= KERNEL_SECTOR_SIZE; pos *= 512; //pos += loop_off; #if 0 UloDbg("pos %Lu, raw_off %u, loop_off %u, sz %d, real_block %lu\n", pos, raw_off, loop_off, size, real_block); #endif // todo: optimize (or make intelligent) this loop err = 0; sz = size; mtx = &dev->bmpmtx; while (sz > 0) { /* test bitmap */ set = 1; loff = pos / PAGE_SIZE; pindex = loff / bmp_page_bytes; bit = loff % bmp_page_bytes; //Dbg("pindex %lu, bit %d\n", pindex, bit); mutex_lock(mtx); err = ulo_bmp_pindex(dev, pindex); if (!err) set = test_bit(bit, dev->bmpbuf); mutex_unlock(mtx); if (unlikely(err)) goto out; /* xfer by userspace */ if (!set) { err = ulo_xfer_begin(dev, pos, PAGE_SIZE); if (!err) err = ulo_xfer_end(dev, pos, PAGE_SIZE, pindex, bit); if (unlikely(err)) goto out; } sz -= PAGE_SIZE; pos += PAGE_SIZE; } /* satisfy the request */ if (!err) { raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; memcpy(loop_buf, raw_buf, size); kunmap_atomic(raw_buf, KM_USER0); kunmap_atomic(loop_buf, KM_USER1); cond_resched(); } out: UloDbgErr(err); return err; } /* ---------------------------------------------------------------------- */ static int ulo_ctl_setbmp(struct ulo_dev *dev, union ulo_ctl __user *uarg) { struct file *bmp; const mode_t rw = (FMODE_READ | FMODE_WRITE); int err; union ulo_ctl ctl; if (unlikely(dev->bmp)) return -EBUSY; if (unlikely((copy_from_user(&ctl, uarg, sizeof(ctl))))) return -EFAULT; bmp = fget(ctl.setbmp.fd); err = -EINVAL; if (unlikely(!bmp || IS_ERR(bmp) || ctl.setbmp.pagesize != PAGE_SIZE)) goto out; err = -EBADF; if (unlikely((bmp->f_mode & rw) != rw)) goto out; dev->bmpsz = i_size_read(bmp->f_dentry->d_inode); if (unlikely(!dev->bmpsz || dev->bmpsz % PAGE_SIZE)) goto out; err = -ENOMEM; dev->bmpbuf = (void *)__get_free_page(GFP_KERNEL); if (unlikely(!dev->bmpbuf)) goto out; dev->bmp = bmp; dev->bmpidx = 0; err = kernel_read(dev->bmp, 0, (void *)dev->bmpbuf, PAGE_SIZE); if (err == PAGE_SIZE) return 0; /* success */ /* error */ if (0 <= err) err = -EIO; dev->bmp = NULL; free_page((unsigned long)dev->bmpbuf); dev->bmpbuf = NULL; out: fput(bmp); return err; } static int ulo_ctl_queue(struct ulo_dev *dev, int qindex, union ulo_ctl __user *uarg) { struct ulo_qelem *qelem; /* this element will be freed by ulo_xfer_{start,end}() */ qelem = kmem_cache_alloc(ulo_cache, GFP_KERNEL); if (IS_ERR(qelem)) return PTR_ERR(qelem); if (unlikely(copy_from_user(&qelem->ctl, uarg, sizeof(*uarg)))) { kmem_cache_free(ulo_cache, qelem); return -EFAULT; } if (qindex == UloQ_READY) qelem->ctl.ready.pid = task_pid(current); ulo_append(dev, qindex, qelem); return 0; } static int ulo_ctl_rcvreq(struct ulo_dev *dev, union ulo_ctl __user *uarg) { struct ulo_queue *queue; struct ulo_qelem *qelem; spinlock_t *spin; //Dbg("rcvreq\n"); queue = dev->queue + UloQ_RCVREQ; qelem = NULL; spin = &queue->spin; spin_lock(spin); if (!list_empty(&queue->head)) { qelem = list_entry(queue->head.next, struct ulo_qelem, list); list_del(&qelem->list); } spin_unlock(spin); if (unlikely(!qelem)) return -ENXIO; if (!copy_to_user(uarg, &qelem->ctl, sizeof(*uarg))) { kmem_cache_free(ulo_cache, qelem); return 0; } /* error */ ulo_append(dev, UloQ_RCVREQ, qelem); return -EFAULT; } static int uloop_ioctl(struct loop_device *loop, int cmd, unsigned long _uarg) { int err; union ulo_ctl __user *uarg; struct ulo_dev *dev; uarg = (__user void *)_uarg; dev = loop->private_data; switch (cmd) { case ULOCTL_SETBMP: err = ulo_ctl_setbmp(dev, uarg); break; case ULOCTL_READY: err = ulo_ctl_queue(dev, UloQ_READY, uarg); break; case ULOCTL_RCVREQ: err = ulo_ctl_rcvreq(dev, uarg); break; case ULOCTL_SNDRES: err = ulo_ctl_queue(dev, UloQ_SNDRES, uarg); break; default: err = -EINVAL; } return err; } /* ---------------------------------------------------------------------- */ /* * ioctl LOOP_SET_STATUS and LOOP_CLR_FD */ static int uloop_release(struct loop_device *loop) { int err, i; struct ulo_dev *dev; struct ulo_queue *queue; struct ulo_qelem *qelem, *tmp; loff_t pos; dev = loop->private_data; if (dev->bmpbuf) { pos = dev->bmpidx; pos *= PAGE_SIZE; err = ulo_write(dev->bmp, dev->bmpbuf, PAGE_SIZE, &pos); free_page((unsigned long)dev->bmpbuf); if (unlikely(err)) UloErr("bitmap write failed (%d), ignored\n", err); } if (dev->bmp) fput(dev->bmp); for (i = 0; i < UloQ_Last; i++) { queue = dev->queue + i; spin_lock(&queue->spin); list_for_each_entry_safe(qelem, tmp, &queue->head, list) { list_del(&qelem->list); kmem_cache_free(ulo_cache, qelem); } spin_unlock(&queue->spin); } kfree(dev); loop->private_data = NULL; return 0; } /* ioctl LOOP_SET_STATUS */ static int uloop_dev_init(struct loop_device *loop, const struct loop_info64 *info) { struct file *file; struct ulo_dev *dev; int i; struct ulo_queue *queue; file = loop->lo_backing_file; if (unlikely(!file || i_size_read(file->f_dentry->d_inode) % PAGE_SIZE)) return -EBADF; loop->private_data = dev = kmalloc(sizeof(*dev), GFP_KERNEL); if (unlikely(!dev)) return -ENOMEM; for (i = 0; i < UloQ_Last; i++) { queue = dev->queue + i; spin_lock_init(&queue->spin); INIT_LIST_HEAD(&queue->head); init_waitqueue_head(&queue->wq); } dev->bmp = NULL; mutex_init(&dev->bmpmtx); dev->bmpbuf = NULL; return 0; } /* ---------------------------------------------------------------------- */ static struct loop_func_table uloop_ops = { .number = LOOP_FILTER_ULOOP, .release = uloop_release, .init = uloop_dev_init, .transfer = uloop_xfer, .ioctl = uloop_ioctl, //.owner = THIS_MODULE }; static int __init uloop_mod_init(void) { int err; ulo_cache = kmem_cache_create(ULOOP_NAME, sizeof(struct ulo_qelem), 0, SLAB_RECLAIM_ACCOUNT, #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23) NULL, #endif NULL); if (!ulo_cache) return -ENOMEM; err = loop_register_transfer(&uloop_ops); if (!err) printk(KERN_INFO ULOOP_NAME " " ULOOP_VERSION "\n"); else kmem_cache_destroy(ulo_cache); return err; } static void __exit uloop_mod_exit(void) { loop_unregister_transfer(uloop_ops.number); kmem_cache_destroy(ulo_cache); } module_init(uloop_mod_init); module_exit(uloop_mod_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Junjiro Okajima"); MODULE_DESCRIPTION(ULOOP_NAME " -- Userspace Loopback Block Device"); MODULE_VERSION(ULOOP_VERSION); sample/uloop/include/000077500000000000000000000000001315652647700151405ustar00rootroot00000000000000sample/uloop/include/linux/000077500000000000000000000000001315652647700162775ustar00rootroot00000000000000sample/uloop/include/linux/uloop.h000066400000000000000000000054301315652647700176100ustar00rootroot00000000000000/* * aufs sample -- ULOOP driver * * Copyright (C) 2005-2010 Junjiro Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __ULOOP_H__ #define __ULOOP_H__ #include #include //#include #ifndef __KERNEL__ #include #endif #define ULOOP_NAME "uloop" #define ULOOP_VERSION "20071126" /* loop filter variation */ #define LOOP_FILTER_ULOOP (MAX_LO_CRYPT - 1) /* ioctl */ #ifndef LOOP_CHANGE_FD #define LOOP_CHANGE_FD 0x4C06 #endif enum {UloCtlErr, UloCtlErr_Last}; enum { /* LOOP_CHANGE_FD is the last number in loop ioctl */ UloCtl_Begin = (LOOP_CHANGE_FD & 0x0ff), UloCtl_SETBMP, UloCtl_READY, UloCtl_RCVREQ, UloCtl_SNDRES }; struct ulo_ctl_setbmp { int fd; int pagesize; }; struct ulo_ctl_ready { int signum; struct pid *pid; /* the driver sets it automatically */ }; struct ulo_ctl_rcvreq { unsigned long long start; int size; }; struct ulo_ctl_sndres { unsigned long long start; int size; }; union ulo_ctl { struct ulo_ctl_setbmp setbmp; struct ulo_ctl_ready ready; struct ulo_ctl_rcvreq rcvreq; struct ulo_ctl_sndres sndres; }; #define ULOCTL_Type 'L' #define ULOCTL_SETBMP _IOW(ULOCTL_Type, UloCtl_SETBMP, union ulo_ctl) #define ULOCTL_READY _IOR(ULOCTL_Type, UloCtl_READY, union ulo_ctl) #define ULOCTL_RCVREQ _IOR(ULOCTL_Type, UloCtl_RCVREQ, union ulo_ctl) #define ULOCTL_SNDRES _IOW(ULOCTL_Type, UloCtl_SNDRES, union ulo_ctl) /* ---------------------------------------------------------------------- */ /* user library API */ #ifndef __KERNEL__ enum {ULO_DEV, ULO_CACHE, ULO_BITMAP, ULO_Last}; struct uloop { int fd[ULO_Last]; int pagesize; unsigned long long tgt_size, cache_size; }; extern const struct uloop *uloop; #define ulo_dev_fd ({ uloop->fd[ULO_DEV]; }) #define ulo_cache_fd ({ uloop->fd[ULO_CACHE]; }) #define ulo_bitmap_fd ({ uloop->fd[ULO_BITMAP]; }) struct ulo_init { char *path[ULO_Last]; int dev_flags; unsigned long long size; }; int ulo_init(struct ulo_init *init); typedef int (*ulo_cb_t)(unsigned long long start, int size, void *arg); int ulo_loop(int sig, ulo_cb_t store, void *arg); #endif #endif /* __ULOOP_H__ */ sample/uloop/libuloop.c000066400000000000000000000120311315652647700155030ustar00rootroot00000000000000/* * aufs sample -- ULOOP driver * * Copyright (C) 2005-2010 Junjiro Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include static struct uloop g_uloop; const struct uloop *uloop = &g_uloop; /* ---------------------------------------------------------------------- */ #define Dbg(fmt, args...) fprintf(stderr, "%s:%d:" fmt, \ __func__, __LINE__, ##args) #define DbgErr(e) if (e) Dbg("err %d\n", e) /* ---------------------------------------------------------------------- */ int ulo_loop(int sig, ulo_cb_t store, void *arg) { int err; static sigset_t sigset; union ulo_ctl ctl; err = sigemptyset(&sigset); if (!err) err = sigaddset(&sigset, sig); if (!err) err = sigprocmask(SIG_BLOCK, &sigset, NULL); while (!err) { ctl.ready.signum = sig; //Dbg("ready\n"); err = ioctl(g_uloop.fd[ULO_DEV], ULOCTL_READY, &ctl); DbgErr(err); if (!err) err = sigwaitinfo(&sigset, NULL); //DbgErr(err); if (err == sig) err = ioctl(g_uloop.fd[ULO_DEV], ULOCTL_RCVREQ, &ctl); DbgErr(err); if (!err) err = store(ctl.rcvreq.start, ctl.rcvreq.size, arg); if (!err) { ctl.sndres.start = ctl.rcvreq.start; ctl.sndres.size = ctl.rcvreq.size; err = ioctl(g_uloop.fd[ULO_DEV], ULOCTL_SNDRES, &ctl); DbgErr(err); } } return err; } /* ---------------------------------------------------------------------- */ static int ulo_create_size(char *path, unsigned long long size) { int fd, err; off_t off; ssize_t sz; struct stat st; err = 0; st.st_size = 0; fd = open(path, O_RDWR | O_CREAT, 0644); if (fd < 0) return fd; err = fstat(fd, &st); if (err) return err; if (st.st_size == size) return fd; /* success */ off = lseek(fd, size - 1, SEEK_SET); if (off == -1) return -1; sz = write(fd, "\0", 1); if (sz != 1) return -1; return fd; /* success */ } static int ulo_init_loop(char *dev_path, int dev_flags, char *cache_path) { int err; struct loop_info64 loinfo64; union ulo_ctl ctl; err = open(dev_path, dev_flags); if (err < 0) goto out; g_uloop.fd[ULO_DEV] = err; err = ioctl(g_uloop.fd[ULO_DEV], LOOP_SET_FD, g_uloop.fd[ULO_CACHE]); if (err) goto out; memset(&loinfo64, 0, sizeof(loinfo64)); strncpy((void *)(loinfo64.lo_file_name), cache_path, LO_NAME_SIZE); loinfo64.lo_encrypt_type = LOOP_FILTER_ULOOP; //strncpy((void *)(loinfo64.lo_crypt_name), "ulttp", LO_NAME_SIZE); //loinfo64.lo_sizelimit = cache_size; err = ioctl(g_uloop.fd[ULO_DEV], LOOP_SET_STATUS64, &loinfo64); if (err) goto out_loop; ctl.setbmp.fd = g_uloop.fd[ULO_BITMAP]; ctl.setbmp.pagesize = g_uloop.pagesize; err = ioctl(g_uloop.fd[ULO_DEV], ULOCTL_SETBMP, &ctl); if (!err) { #if 0 Dbg("{%d, %d, %d}, pgae %d, tgt %Lu, cache %Lu\n", uloop->fd[0], uloop->fd[1], uloop->fd[2], uloop->pagesize, uloop->tgt_size, uloop->cache_size); #endif return 0; } DbgErr(err); out_loop: ioctl(g_uloop.fd[ULO_DEV], LOOP_CLR_FD, g_uloop.fd[ULO_CACHE]); out: return err; } int ulo_init(struct ulo_init *init) { int err; unsigned long long mod, sz; #if 0 err = EINVAL; int i; for (i = 0; i < ULO_Last; i++) if (!init->path[i]) goto out; if (init->size == -1) goto out; #endif g_uloop.cache_size = init->size; g_uloop.tgt_size = init->size; g_uloop.pagesize = sysconf(_SC_PAGESIZE); assert(g_uloop.pagesize > 0); err = EINVAL; mod = g_uloop.cache_size % g_uloop.pagesize; if (mod) g_uloop.cache_size += g_uloop.pagesize - mod; if (g_uloop.cache_size % g_uloop.pagesize) goto out; g_uloop.fd[ULO_CACHE] = ulo_create_size(init->path[ULO_CACHE], g_uloop.cache_size); err = g_uloop.fd[ULO_CACHE]; if (g_uloop.fd[ULO_CACHE] < 0) goto out; sz = g_uloop.cache_size; sz /= g_uloop.pagesize; sz /= CHAR_BIT; if (sz < g_uloop.pagesize) sz = g_uloop.pagesize; else { mod = sz % g_uloop.pagesize; if (mod) sz += g_uloop.pagesize - mod; } err = EINVAL; if (sz % g_uloop.pagesize) goto out; g_uloop.fd[ULO_BITMAP] = ulo_create_size(init->path[ULO_BITMAP], sz); err = g_uloop.fd[ULO_BITMAP]; if (g_uloop.fd[ULO_BITMAP] < 0) goto out; err = ulo_init_loop(init->path[ULO_DEV], init->dev_flags, init->path[ULO_CACHE]); if (!err) return 0; out: return err; } sample/uloop/ulobdev.c000066400000000000000000000103251315652647700153220ustar00rootroot00000000000000/* * aufs sample -- ULOOP driver * * Copyright (C) 2005-2010 Junjiro Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include static int real_fd; static char *me, *real_bdev; #define Dbg(fmt, args...) printf("%s:%d:" fmt, __func__, __LINE__, ##args) #define DbgErr(e) if (e) Dbg("err %d\n", e) /* ---------------------------------------------------------------------- */ static int store(unsigned long long start, int size, void *arg) { int err; unsigned long long m, tsize; char *src, *dst; //Dbg("start %Lu, size %d\n", start, size); assert(start + size <= uloop->cache_size); err = -1; m = start % uloop->pagesize; start -= m; size += m; tsize = uloop->tgt_size; if (tsize < start + size) size = tsize - start; src = mmap(NULL, size, PROT_READ, MAP_SHARED, real_fd, start); if (src == MAP_FAILED) goto out; dst = mmap(NULL, size, PROT_WRITE, MAP_SHARED, ulo_cache_fd, start); if (dst == MAP_FAILED) goto out_src; memcpy(dst, src, size); #if 0 err = msync(dst, size, MS_SYNC); DbgErr(err); #endif err = munmap(dst, size); out_src: munmap(src, size); /* ignore */ out: DbgErr(err); return err; } /* ---------------------------------------------------------------------- */ static unsigned long long init_size(void) { unsigned long long sz; int err, i; sz = -1; real_fd = open(real_bdev, O_RDONLY); if (real_fd < 0) goto out_err; err = ioctl(real_fd, BLKGETSIZE, &i); if (err) { close(real_fd); goto out_err; } sz = i; sz *= 512; goto out; /* success */ out_err: me = real_bdev; out: return sz; } /* ---------------------------------------------------------------------- */ static int init(struct ulo_init *init_args) { int err; init_args->size = init_size(); err = init_args->size; if (err != -1) err = ulo_init(init_args); me = real_bdev; return err; } static void usage(void) { fprintf(stderr, "%s" " [-b bitmap]" " [-c cache]" " /dev/loopN block_device\n", me); exit(EINVAL); } static int parse(int argc, char *argv[], struct ulo_init *init_args) { int opt; static char bitmap_def[] = "/tmp/123456.bitmap", cache_def[] = "/tmp/123456.cache"; while ((opt = getopt(argc, argv, "b:c:")) != -1) { switch (opt) { case 'b': init_args->path[ULO_BITMAP] = optarg; break; case 'c': init_args->path[ULO_CACHE] = optarg; break; default: usage(); break; } } if (argc - optind != 2) { usage(); return EINVAL; } init_args->path[ULO_DEV] = argv[optind]; real_bdev = argv[optind + 1]; if (!init_args->path[ULO_BITMAP]) { snprintf(bitmap_def, sizeof(bitmap_def) - 1, "/tmp/%d.bitmap", getpid()); init_args->path[ULO_BITMAP] = bitmap_def; } if (!init_args->path[ULO_CACHE]) { snprintf(cache_def, sizeof(cache_def) - 1, "/tmp/%d.cache", getpid()); init_args->path[ULO_CACHE] = cache_def; } //Dbg("to %d, %s\n", timeout, real_bdev); return 0; } /* ---------------------------------------------------------------------- */ int main(int argc, char *argv[]) { int err; pid_t pid; struct ulo_init init_args = { .dev_flags = O_RDONLY //.dev_flags = O_RDWR }; me = argv[0]; err = parse(argc, argv, &init_args); if (!err) err = init(&init_args); if (!err) { pid = fork(); if (!pid) err = ulo_loop(SIGUSR1, store, NULL); else if (pid > 0) sleep(1); } if (err && me) perror(me); return err; } sample/uloop/ulohttp.c000066400000000000000000000166451315652647700153740ustar00rootroot00000000000000/* * aufs sample -- ULOOP driver * * Copyright (C) 2005-2010 Junjiro Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include static int timeout = 30; static char *me, *g_url; static CURL *ezcurl; static char range[32]; static struct arg_for_curl { char *p; int written; int size; int err; } arg_for_curl; #define Dbg(fmt, args...) printf("%s:%d:" fmt, __func__, __LINE__, ##args) #define DbgErr(e) if (e) Dbg("err %d\n", e) /* ---------------------------------------------------------------------- */ static int err_curl(CURLcode curle) { int e; e = errno; fprintf(stderr, "%s: %s\n", me, curl_easy_strerror(curle)); me = NULL; return e; } static size_t store_from_curl(void *got, size_t size, size_t nmemb, void *arg) { int real_bytes; #if 0 Dbg("size %u, nmemb %u, arg_for_curl->err %d\n", size, nmemb, arg_for_curl.err); #endif if (!size || !nmemb || arg_for_curl.err) return 0; real_bytes = size * nmemb; if (arg_for_curl.size < arg_for_curl.written + real_bytes) { arg_for_curl.err++; return 0; } memcpy(arg_for_curl.p, got, real_bytes); arg_for_curl.written += real_bytes; arg_for_curl.p += real_bytes; return nmemb; } static int store(unsigned long long start, int size, void *arg) { CURL *ezcurl = arg; CURLcode curle; int err; unsigned long long m, tsize; char *o; //Dbg("start %Lu, size %d\n", start, size); assert(start + size <= uloop->cache_size); m = start % uloop->pagesize; start -= m; arg_for_curl.size = size + m; tsize = uloop->tgt_size; if (tsize < start + arg_for_curl.size) arg_for_curl.size = tsize - start; o = mmap(NULL, arg_for_curl.size, PROT_WRITE, MAP_SHARED, ulo_cache_fd, start); if (o == MAP_FAILED) return -1; arg_for_curl.p = o; arg_for_curl.written = 0; arg_for_curl.err = 0; snprintf(range, sizeof(range) - 1, "%Lu-%Lu", start, start + arg_for_curl.size - 1); curle = curl_easy_perform(ezcurl); err = munmap(o, arg_for_curl.size); if (err) return err; if (curle != CURLE_OK) return err_curl(curle); if (arg_for_curl.written != arg_for_curl.size) return -1; return 0; } /* ---------------------------------------------------------------------- */ static unsigned long long get_size(void) { unsigned long long size; CURLcode curle; char *header, *p; const int hsz = 1024; size = ULONG_MAX; /* error */ header = malloc(hsz); if (!header) return size; arg_for_curl.p = header; arg_for_curl.size = hsz; arg_for_curl.written = 0; arg_for_curl.err = 0; curle = curl_easy_setopt(ezcurl, CURLOPT_HEADERFUNCTION, store_from_curl); if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_RANGE, "0-1"); #if 0 if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_WRITEHEADER, &arg_for_curl); #endif if (curle == CURLE_OK) curle = curl_easy_perform(ezcurl); if (curle != CURLE_OK) { err_curl(curle); return size; } if (arg_for_curl.err) { fprintf(stderr, "%s: internal error.\n", me); errno = EINVAL; return size; } //Dbg("%s\n", header); p = strstr(header, "Content-Range: bytes "); if (p) p = strchr(p, '/'); if (!p) { fprintf(stderr, "%s: no range header, %s\n", me, g_url); errno = EINVAL; return size; } size = strtoull(p + 1, NULL, 10); free(header); /* reset */ curle = curl_easy_setopt(ezcurl, CURLOPT_HEADERFUNCTION, NULL); if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_RANGE, NULL); if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_WRITEHEADER, NULL); if (curle == CURLE_OK) return size; /* success */ err_curl(curle); return ULONG_MAX; /* error */ } static unsigned long long init_curl_and_size(void) { unsigned long long sz; CURLcode curle; sz = -1; errno = ENOMEM; ezcurl = curl_easy_init(); if (!ezcurl) return -1; curle = curl_easy_setopt(ezcurl, CURLOPT_URL, g_url); if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_TIMEOUT, timeout); #if 0 if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_VERBOSE, 1); #endif #if 0 if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_NOPROGRESS, 1); if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_FAILONERROR, 1); #endif if (curle != CURLE_OK) goto out_curl; errno = ERANGE; sz = get_size(); if (sz == ULONG_MAX) goto out_curl; curle = curl_easy_setopt(ezcurl, CURLOPT_WRITEFUNCTION, store_from_curl); #if 0 if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_WRITEDATA, &arg_for_curl); #endif if (curle == CURLE_OK) curle = curl_easy_setopt(ezcurl, CURLOPT_RANGE, range); if (curle == CURLE_OK) return sz; out_curl: err_curl(curle); return sz; } /* ---------------------------------------------------------------------- */ static int init(struct ulo_init *init_args) { int err; init_args->size = init_curl_and_size(); err = init_args->size; if (err != -1) err = ulo_init(init_args); return err; } static void usage(void) { fprintf(stderr, "%s" " [-b bitmap]" " [-c cache]" " [-t timeout]" " /dev/loopN url_for_fs_image_file\n" "and then, \"mount /dev/loopN /wherever/you/like\n", me); exit(EINVAL); } static int parse(int argc, char *argv[], struct ulo_init *init_args) { int opt; static char bitmap_def[] = "/tmp/123456.bitmap", cache_def[] = "/tmp/123456.cache"; while ((opt = getopt(argc, argv, "b:c:t:")) != -1) { switch (opt) { case 'b': init_args->path[ULO_BITMAP] = optarg; break; case 'c': init_args->path[ULO_CACHE] = optarg; break; case 't': errno = 0; timeout = strtol(optarg, NULL, 0); if (errno) { me = optarg; return ERANGE; } break; default: usage(); break; } } if (argc - optind != 2) { usage(); return EINVAL; } init_args->path[ULO_DEV] = argv[optind]; g_url = argv[optind + 1]; if (!init_args->path[ULO_CACHE]) { snprintf(cache_def, sizeof(cache_def) - 1, "/tmp/%d.cache", getpid()); init_args->path[ULO_CACHE] = cache_def; } if (!init_args->path[ULO_BITMAP]) { snprintf(bitmap_def, sizeof(bitmap_def) - 1, "/tmp/%d.bitmap", getpid()); init_args->path[ULO_BITMAP] = bitmap_def; } //Dbg("to %d, %s\n", timeout, g_url); return 0; } /* ---------------------------------------------------------------------- */ int main(int argc, char *argv[]) { int err; pid_t pid; struct ulo_init init_args = { .dev_flags = O_RDONLY }; me = argv[0]; err = parse(argc, argv, &init_args); if (!err) err = init(&init_args); if (!err) { pid = fork(); if (!pid) { err = ulo_loop(SIGUSR1, store, ezcurl); curl_easy_cleanup(ezcurl); } else if (pid > 0) sleep(1); } if (err && me) perror(me); return err; } umount.aufs.c000066400000000000000000000032661315652647700135350ustar00rootroot00000000000000/* * Copyright (C) 2010-2015 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * The main purpose of this script is calling auplink. */ #include #include #include #include #include "au_util.h" int main(int argc, char *argv[]) { int err, i, j; struct mntent ent; char *mntpnt, *av[argc + 1]; if (argc < 2) { puts(AuVersion); errno = EINVAL; goto out; } mntpnt = argv[1]; err = au_proc_getmntent(mntpnt, &ent); if (err) AuFin("no such mount point"); if (!hasmntopt(&ent, "noplink")) { err = au_plink(mntpnt, AuPlink_FLUSH, AuPlinkFlag_OPEN | AuPlinkFlag_CLOEXEC, /*fd*/NULL); if (err) AuFin(NULL); } mng_fhsm(mntpnt, /*umount*/1); i = 0; av[i++] = "umount"; av[i++] = "-i"; for (j = 2; j < argc; j++) av[i++] = argv[j]; av[i++] = mntpnt; av[i] = NULL; j = sizeof(av) / sizeof(*av); if (i > j) AuFin("internal error, %d > %d\n", i, j); execv(UMOUNT_CMD, av); out: AuFin("umount"); return errno; } ver.c000066400000000000000000000025001315652647700120330ustar00rootroot00000000000000/* * Copyright (C) 2011-2017 Junjiro R. Okajima * * This program, aufs is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #undef NDEBUG #include #include #include #include #include "au_util.h" int main(int argc, char *argv[]) { int err; regex_t preg; const char *pat = "^4\\.(9|[1-9][0-9])"; /* aufs4.9 and later */ err = regcomp(&preg, pat, REG_EXTENDED | REG_NOSUB); assert(!err); /* internal error */ if (!regexec(&preg, AUFS_VERSION, 0, NULL, 0)) return 0; puts("Wrong version!\n" AuVersion ", but aufs is " AUFS_VERSION ".\n" "See README in detail and try git branch -a."); return -1; }